1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/CodeGen/DAGCombine.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/LowLevelTypeUtils.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/RuntimeLibcallUtil.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/CodeGenTypes/MachineValueType.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/RuntimeLibcalls.h"
49#include "llvm/IR/Type.h"
50#include "llvm/Support/Alignment.h"
51#include "llvm/Support/AtomicOrdering.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/Compiler.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/KnownFPClass.h"
56#include <algorithm>
57#include <cassert>
58#include <climits>
59#include <cstdint>
60#include <iterator>
61#include <map>
62#include <string>
63#include <utility>
64#include <vector>
65
66namespace llvm {
67
68class AssumptionCache;
69class CCState;
70class CCValAssign;
71enum class ComplexDeinterleavingOperation;
72enum class ComplexDeinterleavingRotation;
73class Constant;
74class FastISel;
75class FunctionLoweringInfo;
76class GlobalValue;
77class Loop;
78class GISelValueTracking;
79class IntrinsicInst;
80class IRBuilderBase;
81struct KnownBits;
82class LLVMContext;
83class MachineBasicBlock;
84class MachineFunction;
85class MachineInstr;
86class MachineJumpTableInfo;
87class MachineLoop;
88class MachineRegisterInfo;
89class MCContext;
90class MCExpr;
91class Module;
92class ProfileSummaryInfo;
93class TargetLibraryInfo;
94class TargetMachine;
95class TargetRegisterClass;
96class TargetRegisterInfo;
97class TargetTransformInfo;
98class Value;
99class VPIntrinsic;
100
101namespace Sched {
102
103enum Preference : uint8_t {
104 None, // No preference
105 Source, // Follow source order.
106 RegPressure, // Scheduling for lowest register pressure.
107 Hybrid, // Scheduling for both latency and register pressure.
108 ILP, // Scheduling for ILP in low register pressure mode.
109 VLIW, // Scheduling for VLIW targets.
110 Fast, // Fast suboptimal list scheduling
111 Linearize, // Linearize DAG, no scheduling
112 Last = Linearize // Marker for the last Sched::Preference
113};
114
115} // end namespace Sched
116
117// MemOp models a memory operation, either memset or memcpy/memmove.
118struct MemOp {
119private:
120 // Shared
121 uint64_t Size;
122 bool DstAlignCanChange; // true if destination alignment can satisfy any
123 // constraint.
124 Align DstAlign; // Specified alignment of the memory operation.
125
126 bool AllowOverlap;
127 // memset only
128 bool IsMemset; // If setthis memory operation is a memset.
129 bool ZeroMemset; // If set clears out memory with zeros.
130 // memcpy only
131 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
132 // constant so it does not need to be loaded.
133 Align SrcAlign; // Inferred alignment of the source or default value if the
134 // memory operation does not need to load the value.
135public:
136 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
137 Align SrcAlign, bool IsVolatile,
138 bool MemcpyStrSrc = false) {
139 MemOp Op;
140 Op.Size = Size;
141 Op.DstAlignCanChange = DstAlignCanChange;
142 Op.DstAlign = DstAlign;
143 Op.AllowOverlap = !IsVolatile;
144 Op.IsMemset = false;
145 Op.ZeroMemset = false;
146 Op.MemcpyStrSrc = MemcpyStrSrc;
147 Op.SrcAlign = SrcAlign;
148 return Op;
149 }
150
151 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
152 bool IsZeroMemset, bool IsVolatile) {
153 MemOp Op;
154 Op.Size = Size;
155 Op.DstAlignCanChange = DstAlignCanChange;
156 Op.DstAlign = DstAlign;
157 Op.AllowOverlap = !IsVolatile;
158 Op.IsMemset = true;
159 Op.ZeroMemset = IsZeroMemset;
160 Op.MemcpyStrSrc = false;
161 return Op;
162 }
163
164 uint64_t size() const { return Size; }
165 Align getDstAlign() const {
166 assert(!DstAlignCanChange);
167 return DstAlign;
168 }
169 bool isFixedDstAlign() const { return !DstAlignCanChange; }
170 bool allowOverlap() const { return AllowOverlap; }
171 bool isMemset() const { return IsMemset; }
172 bool isMemcpy() const { return !IsMemset; }
173 bool isMemcpyWithFixedDstAlign() const {
174 return isMemcpy() && !DstAlignCanChange;
175 }
176 bool isZeroMemset() const { return isMemset() && ZeroMemset; }
177 bool isMemcpyStrSrc() const {
178 assert(isMemcpy() && "Must be a memcpy");
179 return MemcpyStrSrc;
180 }
181 Align getSrcAlign() const {
182 assert(isMemcpy() && "Must be a memcpy");
183 return SrcAlign;
184 }
185 bool isSrcAligned(Align AlignCheck) const {
186 return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value());
187 }
188 bool isDstAligned(Align AlignCheck) const {
189 return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value());
190 }
191 bool isAligned(Align AlignCheck) const {
192 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
193 }
194};
195
196/// This base class for TargetLowering contains the SelectionDAG-independent
197/// parts that can be used from the rest of CodeGen.
198class LLVM_ABI TargetLoweringBase {
199public:
200 /// This enum indicates whether operations are valid for a target, and if not,
201 /// what action should be used to make them valid.
202 enum LegalizeAction : uint8_t {
203 Legal, // The target natively supports this operation.
204 Promote, // This operation should be executed in a larger type.
205 Expand, // Try to expand this to other ops, otherwise use a libcall.
206 LibCall, // Don't try to expand this to other ops, always use a libcall.
207 Custom // Use the LowerOperation hook to implement custom lowering.
208 };
209
210 /// This enum indicates whether a types are legal for a target, and if not,
211 /// what action should be used to make them valid.
212 enum LegalizeTypeAction : uint8_t {
213 TypeLegal, // The target natively supports this type.
214 TypePromoteInteger, // Replace this integer with a larger one.
215 TypeExpandInteger, // Split this integer into two of half the size.
216 TypeSoftenFloat, // Convert this float to a same size integer type.
217 TypeExpandFloat, // Split this float into two of half the size.
218 TypeScalarizeVector, // Replace this one-element vector with its element.
219 TypeSplitVector, // Split this vector into two of half the size.
220 TypeWidenVector, // This vector should be widened into a larger vector.
221 TypePromoteFloat, // Replace this float with a larger one.
222 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic.
223 TypeScalarizeScalableVector, // This action is explicitly left unimplemented.
224 // While it is theoretically possible to
225 // legalize operations on scalable types with a
226 // loop that handles the vscale * #lanes of the
227 // vector, this is non-trivial at SelectionDAG
228 // level and these types are better to be
229 // widened or promoted.
230 };
231
232 /// LegalizeKind holds the legalization kind that needs to happen to EVT
233 /// in order to type-legalize it.
234 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
235
236 /// Enum that describes how the target represents true/false values.
237 enum BooleanContent {
238 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
239 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
240 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
241 };
242
243 /// Enum that describes what type of support for selects the target has.
244 enum SelectSupportKind {
245 ScalarValSelect, // The target supports scalar selects (ex: cmov).
246 ScalarCondVectorVal, // The target supports selects with a scalar condition
247 // and vector values (ex: cmov).
248 VectorMaskSelect // The target supports vector selects with a vector
249 // mask (ex: x86 blends).
250 };
251
252 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
253 /// to, if at all. Exists because different targets have different levels of
254 /// support for these atomic instructions, and also have different options
255 /// w.r.t. what they should expand to.
256 enum class AtomicExpansionKind {
257 None, // Don't expand the instruction.
258 CastToInteger, // Cast the atomic instruction to another type, e.g. from
259 // floating-point to integer type.
260 LLSC, // Expand the instruction into loadlinked/storeconditional; used
261 // by ARM/AArch64.
262 LLOnly, // Expand the (load) instruction into just a load-linked, which has
263 // greater atomic guarantees than a normal load.
264 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
265 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
266 BitTestIntrinsic, // Use a target-specific intrinsic for special bit
267 // operations; used by X86.
268 CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
269 // operations; used by X86.
270 Expand, // Generic expansion in terms of other atomic operations.
271
272 // Rewrite to a non-atomic form for use in a known non-preemptible
273 // environment.
274 NotAtomic
275 };
276
277 /// Enum that specifies when a multiplication should be expanded.
278 enum class MulExpansionKind {
279 Always, // Always expand the instruction.
280 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
281 // or custom.
282 };
283
284 /// Enum that specifies when a float negation is beneficial.
285 enum class NegatibleCost {
286 Cheaper = 0, // Negated expression is cheaper.
287 Neutral = 1, // Negated expression has the same cost.
288 Expensive = 2 // Negated expression is more expensive.
289 };
290
291 /// Enum of different potentially desirable ways to fold (and/or (setcc ...),
292 /// (setcc ...)).
293 enum AndOrSETCCFoldKind : uint8_t {
294 None = 0, // No fold is preferable.
295 AddAnd = 1, // Fold with `Add` op and `And` op is preferable.
296 NotAnd = 2, // Fold with `Not` op and `And` op is preferable.
297 ABS = 4, // Fold with `llvm.abs` op is preferable.
298 };
299
300 class ArgListEntry {
301 public:
302 Value *Val = nullptr;
303 SDValue Node = SDValue();
304 Type *Ty = nullptr;
305 bool IsSExt : 1;
306 bool IsZExt : 1;
307 bool IsNoExt : 1;
308 bool IsInReg : 1;
309 bool IsSRet : 1;
310 bool IsNest : 1;
311 bool IsByVal : 1;
312 bool IsByRef : 1;
313 bool IsInAlloca : 1;
314 bool IsPreallocated : 1;
315 bool IsReturned : 1;
316 bool IsSwiftSelf : 1;
317 bool IsSwiftAsync : 1;
318 bool IsSwiftError : 1;
319 bool IsCFGuardTarget : 1;
320 MaybeAlign Alignment = std::nullopt;
321 Type *IndirectType = nullptr;
322
323 ArgListEntry()
324 : IsSExt(false), IsZExt(false), IsNoExt(false), IsInReg(false),
325 IsSRet(false), IsNest(false), IsByVal(false), IsByRef(false),
326 IsInAlloca(false), IsPreallocated(false), IsReturned(false),
327 IsSwiftSelf(false), IsSwiftAsync(false), IsSwiftError(false),
328 IsCFGuardTarget(false) {}
329
330 LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx);
331 };
332 using ArgListTy = std::vector<ArgListEntry>;
333
334 static ISD::NodeType getExtendForContent(BooleanContent Content) {
335 switch (Content) {
336 case UndefinedBooleanContent:
337 // Extend by adding rubbish bits.
338 return ISD::ANY_EXTEND;
339 case ZeroOrOneBooleanContent:
340 // Extend by adding zero bits.
341 return ISD::ZERO_EXTEND;
342 case ZeroOrNegativeOneBooleanContent:
343 // Extend by copying the sign bit.
344 return ISD::SIGN_EXTEND;
345 }
346 llvm_unreachable("Invalid content kind");
347 }
348
349 explicit TargetLoweringBase(const TargetMachine &TM);
350 TargetLoweringBase(const TargetLoweringBase &) = delete;
351 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
352 virtual ~TargetLoweringBase();
353
354 /// Return true if the target support strict float operation
355 bool isStrictFPEnabled() const {
356 return IsStrictFPEnabled;
357 }
358
359protected:
360 /// Initialize all of the actions to default values.
361 void initActions();
362
363public:
364 const TargetMachine &getTargetMachine() const { return TM; }
365
366 virtual bool useSoftFloat() const { return false; }
367
368 /// Return the pointer type for the given address space, defaults to
369 /// the pointer type from the data layout.
370 /// FIXME: The default needs to be removed once all the code is updated.
371 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
372 return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
373 }
374
375 /// Return the in-memory pointer type for the given address space, defaults to
376 /// the pointer type from the data layout.
377 /// FIXME: The default needs to be removed once all the code is updated.
378 virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
379 return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
380 }
381
382 /// Return the type for frame index, which is determined by
383 /// the alloca address space specified through the data layout.
384 MVT getFrameIndexTy(const DataLayout &DL) const {
385 return getPointerTy(DL, AS: DL.getAllocaAddrSpace());
386 }
387
388 /// Return the type for code pointers, which is determined by the program
389 /// address space specified through the data layout.
390 MVT getProgramPointerTy(const DataLayout &DL) const {
391 return getPointerTy(DL, AS: DL.getProgramAddressSpace());
392 }
393
394 /// Return the type for operands of fence.
395 /// TODO: Let fence operands be of i32 type and remove this.
396 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
397 return getPointerTy(DL);
398 }
399
400 /// Return the type to use for a scalar shift opcode, given the shifted amount
401 /// type. Targets should return a legal type if the input type is legal.
402 /// Targets can return a type that is too small if the input type is illegal.
403 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
404
405 /// Returns the type for the shift amount of a shift opcode. For vectors,
406 /// returns the input type. For scalars, calls getScalarShiftAmountTy.
407 /// If getScalarShiftAmountTy type cannot represent all possible shift
408 /// amounts, returns MVT::i32.
409 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const;
410
411 /// Return the preferred type to use for a shift opcode, given the shifted
412 /// amount type is \p ShiftValueTy.
413 LLVM_READONLY
414 virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const {
415 return ShiftValueTy;
416 }
417
418 /// Returns the type to be used for the index operand vector operations. By
419 /// default we assume it will have the same size as an address space 0
420 /// pointer.
421 virtual unsigned getVectorIdxWidth(const DataLayout &DL) const {
422 return DL.getPointerSizeInBits(AS: 0);
423 }
424
425 /// Returns the type to be used for the index operand of:
426 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
427 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
428 MVT getVectorIdxTy(const DataLayout &DL) const {
429 return MVT::getIntegerVT(BitWidth: getVectorIdxWidth(DL));
430 }
431
432 /// Returns the type to be used for the index operand of:
433 /// G_INSERT_VECTOR_ELT, G_EXTRACT_VECTOR_ELT,
434 /// G_INSERT_SUBVECTOR, and G_EXTRACT_SUBVECTOR
435 LLT getVectorIdxLLT(const DataLayout &DL) const {
436 return LLT::scalar(SizeInBits: getVectorIdxWidth(DL));
437 }
438
439 /// Returns the type to be used for the EVL/AVL operand of VP nodes:
440 /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type,
441 /// and must be at least as large as i32. The EVL is implicitly zero-extended
442 /// to any larger type.
443 virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; }
444
445 /// This callback is used to inspect load/store instructions and add
446 /// target-specific MachineMemOperand flags to them. The default
447 /// implementation does nothing.
448 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const {
449 return MachineMemOperand::MONone;
450 }
451
452 /// This callback is used to inspect load/store SDNode.
453 /// The default implementation does nothing.
454 virtual MachineMemOperand::Flags
455 getTargetMMOFlags(const MemSDNode &Node) const {
456 return MachineMemOperand::MONone;
457 }
458
459 MachineMemOperand::Flags
460 getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL,
461 AssumptionCache *AC = nullptr,
462 const TargetLibraryInfo *LibInfo = nullptr) const;
463 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
464 const DataLayout &DL) const;
465 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
466 const DataLayout &DL) const;
467
468 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
469 return true;
470 }
471
472 /// Return true if the @llvm.experimental.vector.partial.reduce.* intrinsic
473 /// should be expanded using generic code in SelectionDAGBuilder.
474 virtual bool
475 shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const {
476 return true;
477 }
478
479 /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
480 /// using generic code in SelectionDAGBuilder.
481 virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const {
482 return true;
483 }
484
485 virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF,
486 bool IsScalable) const {
487 return true;
488 }
489
490 /// Return true if the @llvm.experimental.cttz.elts intrinsic should be
491 /// expanded using generic code in SelectionDAGBuilder.
492 virtual bool shouldExpandCttzElements(EVT VT) const { return true; }
493
494 /// Return the minimum number of bits required to hold the maximum possible
495 /// number of trailing zero vector elements.
496 unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC,
497 bool ZeroIsPoison,
498 const ConstantRange *VScaleRange) const;
499
500 /// Return true if the @llvm.experimental.vector.match intrinsic should be
501 /// expanded for vector type `VT' and search size `SearchSize' using generic
502 /// code in SelectionDAGBuilder.
503 virtual bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const {
504 return true;
505 }
506
507 // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
508 // vecreduce(op(x, y)) for the reduction opcode RedOpc.
509 virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
510 return true;
511 }
512
513 /// Return true if it is profitable to convert a select of FP constants into
514 /// a constant pool load whose address depends on the select condition. The
515 /// parameter may be used to differentiate a select with FP compare from
516 /// integer compare.
517 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
518 return true;
519 }
520
521 /// Return true if multiple condition registers are available.
522 bool hasMultipleConditionRegisters() const {
523 return HasMultipleConditionRegisters;
524 }
525
526 /// Return true if the target has BitExtract instructions.
527 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
528
529 /// Return the preferred vector type legalization action.
530 virtual TargetLoweringBase::LegalizeTypeAction
531 getPreferredVectorAction(MVT VT) const {
532 // The default action for one element vectors is to scalarize
533 if (VT.getVectorElementCount().isScalar())
534 return TypeScalarizeVector;
535 // The default action for an odd-width vector is to widen.
536 if (!VT.isPow2VectorType())
537 return TypeWidenVector;
538 // The default action for other vectors is to promote
539 return TypePromoteInteger;
540 }
541
542 // Return true if the half type should be promoted using soft promotion rules
543 // where each operation is promoted to f32 individually, then converted to
544 // fp16. The default behavior is to promote chains of operations, keeping
545 // intermediate results in f32 precision and range.
546 virtual bool softPromoteHalfType() const { return false; }
547
548 // Return true if, for soft-promoted half, the half type should be passed
549 // passed to and returned from functions as f32. The default behavior is to
550 // pass as i16. If soft-promoted half is not used, this function is ignored
551 // and values are always passed and returned as f32.
552 virtual bool useFPRegsForHalfType() const { return false; }
553
554 // There are two general methods for expanding a BUILD_VECTOR node:
555 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
556 // them together.
557 // 2. Build the vector on the stack and then load it.
558 // If this function returns true, then method (1) will be used, subject to
559 // the constraint that all of the necessary shuffles are legal (as determined
560 // by isShuffleMaskLegal). If this function returns false, then method (2) is
561 // always used. The vector type, and the number of defined values, are
562 // provided.
563 virtual bool
564 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
565 unsigned DefinedValues) const {
566 return DefinedValues < 3;
567 }
568
569 /// Return true if integer divide is usually cheaper than a sequence of
570 /// several shifts, adds, and multiplies for this target.
571 /// The definition of "cheaper" may depend on whether we're optimizing
572 /// for speed or for size.
573 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
574
575 /// Return true if the target can handle a standalone remainder operation.
576 virtual bool hasStandaloneRem(EVT VT) const {
577 return true;
578 }
579
580 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
581 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
582 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
583 return false;
584 }
585
586 /// Reciprocal estimate status values used by the functions below.
587 enum ReciprocalEstimate : int {
588 Unspecified = -1,
589 Disabled = 0,
590 Enabled = 1
591 };
592
593 /// Return a ReciprocalEstimate enum value for a square root of the given type
594 /// based on the function's attributes. If the operation is not overridden by
595 /// the function's attributes, "Unspecified" is returned and target defaults
596 /// are expected to be used for instruction selection.
597 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
598
599 /// Return a ReciprocalEstimate enum value for a division of the given type
600 /// based on the function's attributes. If the operation is not overridden by
601 /// the function's attributes, "Unspecified" is returned and target defaults
602 /// are expected to be used for instruction selection.
603 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
604
605 /// Return the refinement step count for a square root of the given type based
606 /// on the function's attributes. If the operation is not overridden by
607 /// the function's attributes, "Unspecified" is returned and target defaults
608 /// are expected to be used for instruction selection.
609 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
610
611 /// Return the refinement step count for a division of the given type based
612 /// on the function's attributes. If the operation is not overridden by
613 /// the function's attributes, "Unspecified" is returned and target defaults
614 /// are expected to be used for instruction selection.
615 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
616
617 /// Returns true if target has indicated at least one type should be bypassed.
618 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
619
620 /// Returns map of slow types for division or remainder with corresponding
621 /// fast types
622 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
623 return BypassSlowDivWidths;
624 }
625
626 /// Return true only if vscale must be a power of two.
627 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
628
629 /// Return true if Flow Control is an expensive operation that should be
630 /// avoided.
631 bool isJumpExpensive() const { return JumpIsExpensive; }
632
633 // Costs parameters used by
634 // SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
635 // shouldKeepJumpConditionsTogether will use these parameter value to
636 // determine if two conditions in the form `br (and/or cond1, cond2)` should
637 // be split into two branches or left as one.
638 //
639 // BaseCost is the cost threshold (in latency). If the estimated latency of
640 // computing both `cond1` and `cond2` is below the cost of just computing
641 // `cond1` + BaseCost, the two conditions will be kept together. Otherwise
642 // they will be split.
643 //
644 // LikelyBias increases BaseCost if branch probability info indicates that it
645 // is likely that both `cond1` and `cond2` will be computed.
646 //
647 // UnlikelyBias decreases BaseCost if branch probability info indicates that
648 // it is likely that both `cond1` and `cond2` will be computed.
649 //
650 // Set any field to -1 to make it ignored (setting BaseCost to -1 results in
651 // `shouldKeepJumpConditionsTogether` always returning false).
652 struct CondMergingParams {
653 int BaseCost;
654 int LikelyBias;
655 int UnlikelyBias;
656 };
657 // Return params for deciding if we should keep two branch conditions merged
658 // or split them into two separate branches.
659 // Arg0: The binary op joining the two conditions (and/or).
660 // Arg1: The first condition (cond1)
661 // Arg2: The second condition (cond2)
662 virtual CondMergingParams
663 getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
664 const Value *) const {
665 // -1 will always result in splitting.
666 return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1};
667 }
668
669 /// Return true if selects are only cheaper than branches if the branch is
670 /// unlikely to be predicted right.
671 bool isPredictableSelectExpensive() const {
672 return PredictableSelectIsExpensive;
673 }
674
675 virtual bool fallBackToDAGISel(const Instruction &Inst) const {
676 return false;
677 }
678
679 /// Return true if the following transform is beneficial:
680 /// fold (conv (load x)) -> (load (conv*)x)
681 /// On architectures that don't natively support some vector loads
682 /// efficiently, casting the load to a smaller vector of larger types and
683 /// loading is more efficient, however, this can be undone by optimizations in
684 /// dag combiner.
685 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
686 const SelectionDAG &DAG,
687 const MachineMemOperand &MMO) const;
688
689 /// Return true if the following transform is beneficial:
690 /// (store (y (conv x)), y*)) -> (store x, (x*))
691 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
692 const SelectionDAG &DAG,
693 const MachineMemOperand &MMO) const {
694 // Default to the same logic as loads.
695 return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO);
696 }
697
698 /// Return true if it is expected to be cheaper to do a store of vector
699 /// constant with the given size and type for the address space than to
700 /// store the individual scalar element constants.
701 virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
702 unsigned NumElem,
703 unsigned AddrSpace) const {
704 return IsZero;
705 }
706
707 /// Allow store merging for the specified type after legalization in addition
708 /// to before legalization. This may transform stores that do not exist
709 /// earlier (for example, stores created from intrinsics).
710 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
711 return true;
712 }
713
714 /// Returns if it's reasonable to merge stores to MemVT size.
715 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
716 const MachineFunction &MF) const {
717 return true;
718 }
719
720 /// Return true if it is cheap to speculate a call to intrinsic cttz.
721 virtual bool isCheapToSpeculateCttz(Type *Ty) const {
722 return false;
723 }
724
725 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
726 virtual bool isCheapToSpeculateCtlz(Type *Ty) const {
727 return false;
728 }
729
730 /// Return true if ctlz instruction is fast.
731 virtual bool isCtlzFast() const {
732 return false;
733 }
734
735 /// Return true if ctpop instruction is fast.
736 virtual bool isCtpopFast(EVT VT) const {
737 return isOperationLegal(Op: ISD::CTPOP, VT);
738 }
739
740 /// Return the maximum number of "x & (x - 1)" operations that can be done
741 /// instead of deferring to a custom CTPOP.
742 virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const {
743 return 1;
744 }
745
746 /// Return true if instruction generated for equality comparison is folded
747 /// with instruction generated for signed comparison.
748 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
749
750 /// Return true if the heuristic to prefer icmp eq zero should be used in code
751 /// gen prepare.
752 virtual bool preferZeroCompareBranch() const { return false; }
753
754 /// Return true if it is cheaper to split the store of a merged int val
755 /// from a pair of smaller values into multiple stores.
756 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
757 return false;
758 }
759
760 /// Return if the target supports combining a
761 /// chain like:
762 /// \code
763 /// %andResult = and %val1, #mask
764 /// %icmpResult = icmp %andResult, 0
765 /// \endcode
766 /// into a single machine instruction of a form like:
767 /// \code
768 /// cc = test %register, #mask
769 /// \endcode
770 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
771 return false;
772 }
773
774 /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
775 virtual bool
776 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
777 const MemSDNode &NodeY) const {
778 return true;
779 }
780
781 /// Use bitwise logic to make pairs of compares more efficient. For example:
782 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
783 /// This should be true when it takes more than one instruction to lower
784 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
785 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
786 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
787 return false;
788 }
789
790 /// Return the preferred operand type if the target has a quick way to compare
791 /// integer values of the given size. Assume that any legal integer type can
792 /// be compared efficiently. Targets may override this to allow illegal wide
793 /// types to return a vector type if there is support to compare that type.
794 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
795 MVT VT = MVT::getIntegerVT(BitWidth: NumBits);
796 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
797 }
798
799 /// Return true if the target should transform:
800 /// (X & Y) == Y ---> (~X & Y) == 0
801 /// (X & Y) != Y ---> (~X & Y) != 0
802 ///
803 /// This may be profitable if the target has a bitwise and-not operation that
804 /// sets comparison flags. A target may want to limit the transformation based
805 /// on the type of Y or if Y is a constant.
806 ///
807 /// Note that the transform will not occur if Y is known to be a power-of-2
808 /// because a mask and compare of a single bit can be handled by inverting the
809 /// predicate, for example:
810 /// (X & 8) == 8 ---> (X & 8) != 0
811 virtual bool hasAndNotCompare(SDValue Y) const {
812 return false;
813 }
814
815 /// Return true if the target has a bitwise and-not operation:
816 /// X = ~A & B
817 /// This can be used to simplify select or other instructions.
818 virtual bool hasAndNot(SDValue X) const {
819 // If the target has the more complex version of this operation, assume that
820 // it has this operation too.
821 return hasAndNotCompare(Y: X);
822 }
823
824 /// Return true if the target has a bit-test instruction:
825 /// (X & (1 << Y)) ==/!= 0
826 /// This knowledge can be used to prevent breaking the pattern,
827 /// or creating it if it could be recognized.
828 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
829
830 /// There are two ways to clear extreme bits (either low or high):
831 /// Mask: x & (-1 << y) (the instcombine canonical form)
832 /// Shifts: x >> y << y
833 /// Return true if the variant with 2 variable shifts is preferred.
834 /// Return false if there is no preference.
835 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
836 // By default, let's assume that no one prefers shifts.
837 return false;
838 }
839
840 /// Return true if it is profitable to fold a pair of shifts into a mask.
841 /// This is usually true on most targets. But some targets, like Thumb1,
842 /// have immediate shift instructions, but no immediate "and" instruction;
843 /// this makes the fold unprofitable.
844 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
845 CombineLevel Level) const {
846 return true;
847 }
848
849 /// Should we tranform the IR-optimal check for whether given truncation
850 /// down into KeptBits would be truncating or not:
851 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
852 /// Into it's more traditional form:
853 /// ((%x << C) a>> C) dstcond %x
854 /// Return true if we should transform.
855 /// Return false if there is no preference.
856 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
857 unsigned KeptBits) const {
858 // By default, let's assume that no one prefers shifts.
859 return false;
860 }
861
862 /// Given the pattern
863 /// (X & (C l>>/<< Y)) ==/!= 0
864 /// return true if it should be transformed into:
865 /// ((X <</l>> Y) & C) ==/!= 0
866 /// WARNING: if 'X' is a constant, the fold may deadlock!
867 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
868 /// here because it can end up being not linked in.
869 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
870 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
871 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
872 SelectionDAG &DAG) const {
873 if (hasBitTest(X, Y)) {
874 // One interesting pattern that we'd want to form is 'bit test':
875 // ((1 << Y) & C) ==/!= 0
876 // But we also need to be careful not to try to reverse that fold.
877
878 // Is this '1 << Y' ?
879 if (OldShiftOpcode == ISD::SHL && CC->isOne())
880 return false; // Keep the 'bit test' pattern.
881
882 // Will it be '1 << Y' after the transform ?
883 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
884 return true; // Do form the 'bit test' pattern.
885 }
886
887 // If 'X' is a constant, and we transform, then we will immediately
888 // try to undo the fold, thus causing endless combine loop.
889 // So by default, let's assume everyone prefers the fold
890 // iff 'X' is not a constant.
891 return !XC;
892 }
893
894 // Return true if its desirable to perform the following transform:
895 // (fmul C, (uitofp Pow2))
896 // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
897 // (fdiv C, (uitofp Pow2))
898 // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
899 //
900 // This is only queried after we have verified the transform will be bitwise
901 // equals.
902 //
903 // SDNode *N : The FDiv/FMul node we want to transform.
904 // SDValue FPConst: The Float constant operand in `N`.
905 // SDValue IntPow2: The Integer power of 2 operand in `N`.
906 virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
907 SDValue IntPow2) const {
908 // Default to avoiding fdiv which is often very expensive.
909 return N->getOpcode() == ISD::FDIV;
910 }
911
912 // Given:
913 // (icmp eq/ne (and X, C0), (shift X, C1))
914 // or
915 // (icmp eq/ne X, (rotate X, CPow2))
916
917 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
918 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
919 // Do we prefer the shift to be shift-right, shift-left, or rotate.
920 // Note: Its only valid to convert the rotate version to the shift version iff
921 // the shift-amt (`C1`) is a power of 2 (including 0).
922 // If ShiftOpc (current Opcode) is returned, do nothing.
923 virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(
924 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
925 const APInt &ShiftOrRotateAmt,
926 const std::optional<APInt> &AndMask) const {
927 return ShiftOpc;
928 }
929
930 /// These two forms are equivalent:
931 /// sub %y, (xor %x, -1)
932 /// add (add %x, 1), %y
933 /// The variant with two add's is IR-canonical.
934 /// Some targets may prefer one to the other.
935 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
936 // By default, let's assume that everyone prefers the form with two add's.
937 return true;
938 }
939
940 // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets
941 // may want to avoid this to prevent loss of sub_nsw pattern.
942 virtual bool preferABDSToABSWithNSW(EVT VT) const {
943 return true;
944 }
945
946 // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X))
947 virtual bool preferScalarizeSplat(SDNode *N) const { return true; }
948
949 // Return true if the target wants to transform:
950 // (TruncVT truncate(sext_in_reg(VT X, ExtVT))
951 // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT))
952 // Some targets might prefer pre-sextinreg to improve truncation/saturation.
953 virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const {
954 return true;
955 }
956
957 /// Return true if the target wants to use the optimization that
958 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
959 /// promotedInst1(...(promotedInstN(ext(load)))).
960 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
961
962 /// Return true if the target can combine store(extractelement VectorTy,
963 /// Idx).
964 /// \p Cost[out] gives the cost of that transformation when this is true.
965 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
966 unsigned &Cost) const {
967 return false;
968 }
969
970 /// Return true if the target shall perform extract vector element and store
971 /// given that the vector is known to be splat of constant.
972 /// \p Index[out] gives the index of the vector element to be extracted when
973 /// this is true.
974 virtual bool shallExtractConstSplatVectorElementToStore(
975 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
976 return false;
977 }
978
979 /// Return true if inserting a scalar into a variable element of an undef
980 /// vector is more efficiently handled by splatting the scalar instead.
981 virtual bool shouldSplatInsEltVarIndex(EVT) const {
982 return false;
983 }
984
985 /// Return true if target always benefits from combining into FMA for a
986 /// given value type. This must typically return false on targets where FMA
987 /// takes more cycles to execute than FADD.
988 virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; }
989
990 /// Return true if target always benefits from combining into FMA for a
991 /// given value type. This must typically return false on targets where FMA
992 /// takes more cycles to execute than FADD.
993 virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; }
994
995 /// Return the ValueType of the result of SETCC operations.
996 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
997 EVT VT) const;
998
999 /// Return the ValueType for comparison libcalls. Comparison libcalls include
1000 /// floating point comparison calls, and Ordered/Unordered check calls on
1001 /// floating point numbers.
1002 virtual
1003 MVT::SimpleValueType getCmpLibcallReturnType() const;
1004
1005 /// For targets without i1 registers, this gives the nature of the high-bits
1006 /// of boolean values held in types wider than i1.
1007 ///
1008 /// "Boolean values" are special true/false values produced by nodes like
1009 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
1010 /// Not to be confused with general values promoted from i1. Some cpus
1011 /// distinguish between vectors of boolean and scalars; the isVec parameter
1012 /// selects between the two kinds. For example on X86 a scalar boolean should
1013 /// be zero extended from i1, while the elements of a vector of booleans
1014 /// should be sign extended from i1.
1015 ///
1016 /// Some cpus also treat floating point types the same way as they treat
1017 /// vectors instead of the way they treat scalars.
1018 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
1019 if (isVec)
1020 return BooleanVectorContents;
1021 return isFloat ? BooleanFloatContents : BooleanContents;
1022 }
1023
1024 BooleanContent getBooleanContents(EVT Type) const {
1025 return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint());
1026 }
1027
1028 /// Promote the given target boolean to a target boolean of the given type.
1029 /// A target boolean is an integer value, not necessarily of type i1, the bits
1030 /// of which conform to getBooleanContents.
1031 ///
1032 /// ValVT is the type of values that produced the boolean.
1033 SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool,
1034 EVT ValVT) const {
1035 SDLoc dl(Bool);
1036 EVT BoolVT =
1037 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT);
1038 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT));
1039 return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool);
1040 }
1041
1042 /// Return target scheduling preference.
1043 Sched::Preference getSchedulingPreference() const {
1044 return SchedPreferenceInfo;
1045 }
1046
1047 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
1048 /// for different nodes. This function returns the preference (or none) for
1049 /// the given node.
1050 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
1051 return Sched::None;
1052 }
1053
1054 /// Return the register class that should be used for the specified value
1055 /// type.
1056 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
1057 (void)isDivergent;
1058 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
1059 assert(RC && "This value type is not natively supported!");
1060 return RC;
1061 }
1062
1063 /// Allows target to decide about the register class of the
1064 /// specific value that is live outside the defining block.
1065 /// Returns true if the value needs uniform register class.
1066 virtual bool requiresUniformRegister(MachineFunction &MF,
1067 const Value *) const {
1068 return false;
1069 }
1070
1071 /// Return the 'representative' register class for the specified value
1072 /// type.
1073 ///
1074 /// The 'representative' register class is the largest legal super-reg
1075 /// register class for the register class of the value type. For example, on
1076 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
1077 /// register class is GR64 on x86_64.
1078 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
1079 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
1080 return RC;
1081 }
1082
1083 /// Return the cost of the 'representative' register class for the specified
1084 /// value type.
1085 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
1086 return RepRegClassCostForVT[VT.SimpleTy];
1087 }
1088
1089 /// Return the preferred strategy to legalize tihs SHIFT instruction, with
1090 /// \p ExpansionFactor being the recursion depth - how many expansion needed.
1091 enum class ShiftLegalizationStrategy {
1092 ExpandToParts,
1093 ExpandThroughStack,
1094 LowerToLibcall
1095 };
1096 virtual ShiftLegalizationStrategy
1097 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1098 unsigned ExpansionFactor) const {
1099 if (ExpansionFactor == 1)
1100 return ShiftLegalizationStrategy::ExpandToParts;
1101 return ShiftLegalizationStrategy::ExpandThroughStack;
1102 }
1103
1104 /// Return true if the target has native support for the specified value type.
1105 /// This means that it has a register that directly holds it without
1106 /// promotions or expansions.
1107 bool isTypeLegal(EVT VT) const {
1108 assert(!VT.isSimple() ||
1109 (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT));
1110 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
1111 }
1112
1113 class ValueTypeActionImpl {
1114 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
1115 /// that indicates how instruction selection should deal with the type.
1116 LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE];
1117
1118 public:
1119 ValueTypeActionImpl() {
1120 std::fill(first: std::begin(arr&: ValueTypeActions), last: std::end(arr&: ValueTypeActions),
1121 value: TypeLegal);
1122 }
1123
1124 LegalizeTypeAction getTypeAction(MVT VT) const {
1125 return ValueTypeActions[VT.SimpleTy];
1126 }
1127
1128 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
1129 ValueTypeActions[VT.SimpleTy] = Action;
1130 }
1131 };
1132
1133 const ValueTypeActionImpl &getValueTypeActions() const {
1134 return ValueTypeActions;
1135 }
1136
1137 /// Return pair that represents the legalization kind (first) that needs to
1138 /// happen to EVT (second) in order to type-legalize it.
1139 ///
1140 /// First: how we should legalize values of this type, either it is already
1141 /// legal (return 'Legal') or we need to promote it to a larger type (return
1142 /// 'Promote'), or we need to expand it into multiple registers of smaller
1143 /// integer type (return 'Expand'). 'Custom' is not an option.
1144 ///
1145 /// Second: for types supported by the target, this is an identity function.
1146 /// For types that must be promoted to larger types, this returns the larger
1147 /// type to promote to. For integer types that are larger than the largest
1148 /// integer register, this contains one step in the expansion to get to the
1149 /// smaller register. For illegal floating point types, this returns the
1150 /// integer type to transform to.
1151 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
1152
1153 /// Return how we should legalize values of this type, either it is already
1154 /// legal (return 'Legal') or we need to promote it to a larger type (return
1155 /// 'Promote'), or we need to expand it into multiple registers of smaller
1156 /// integer type (return 'Expand'). 'Custom' is not an option.
1157 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
1158 return getTypeConversion(Context, VT).first;
1159 }
1160 LegalizeTypeAction getTypeAction(MVT VT) const {
1161 return ValueTypeActions.getTypeAction(VT);
1162 }
1163
1164 /// For types supported by the target, this is an identity function. For
1165 /// types that must be promoted to larger types, this returns the larger type
1166 /// to promote to. For integer types that are larger than the largest integer
1167 /// register, this contains one step in the expansion to get to the smaller
1168 /// register. For illegal floating point types, this returns the integer type
1169 /// to transform to.
1170 virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
1171 return getTypeConversion(Context, VT).second;
1172 }
1173
1174 /// For types supported by the target, this is an identity function. For
1175 /// types that must be expanded (i.e. integer types that are larger than the
1176 /// largest integer register or illegal floating point types), this returns
1177 /// the largest legal type it will be expanded to.
1178 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
1179 assert(!VT.isVector());
1180 while (true) {
1181 switch (getTypeAction(Context, VT)) {
1182 case TypeLegal:
1183 return VT;
1184 case TypeExpandInteger:
1185 VT = getTypeToTransformTo(Context, VT);
1186 break;
1187 default:
1188 llvm_unreachable("Type is not legal nor is it to be expanded!");
1189 }
1190 }
1191 }
1192
1193 /// Vector types are broken down into some number of legal first class types.
1194 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
1195 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
1196 /// turns into 4 EVT::i32 values with both PPC and X86.
1197 ///
1198 /// This method returns the number of registers needed, and the VT for each
1199 /// register. It also returns the VT and quantity of the intermediate values
1200 /// before they are promoted/expanded.
1201 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
1202 EVT &IntermediateVT,
1203 unsigned &NumIntermediates,
1204 MVT &RegisterVT) const;
1205
1206 /// Certain targets such as MIPS require that some types such as vectors are
1207 /// always broken down into scalars in some contexts. This occurs even if the
1208 /// vector type is legal.
1209 virtual unsigned getVectorTypeBreakdownForCallingConv(
1210 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1211 unsigned &NumIntermediates, MVT &RegisterVT) const {
1212 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
1213 RegisterVT);
1214 }
1215
1216 struct IntrinsicInfo {
1217 unsigned opc = 0; // target opcode
1218 EVT memVT; // memory VT
1219
1220 // value representing memory location
1221 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
1222
1223 // Fallback address space for use if ptrVal is nullptr. std::nullopt means
1224 // unknown address space.
1225 std::optional<unsigned> fallbackAddressSpace;
1226
1227 int offset = 0; // offset off of ptrVal
1228 uint64_t size = 0; // the size of the memory location
1229 // (taken from memVT if zero)
1230 MaybeAlign align = Align(1); // alignment
1231
1232 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
1233 SyncScope::ID ssid = SyncScope::System;
1234 AtomicOrdering order = AtomicOrdering::NotAtomic;
1235 AtomicOrdering failureOrder = AtomicOrdering::NotAtomic;
1236 IntrinsicInfo() = default;
1237 };
1238
1239 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1240 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1241 /// true and store the intrinsic information into the IntrinsicInfo that was
1242 /// passed to the function.
1243 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
1244 MachineFunction &,
1245 unsigned /*Intrinsic*/) const {
1246 return false;
1247 }
1248
1249 /// Returns true if the target can instruction select the specified FP
1250 /// immediate natively. If false, the legalizer will materialize the FP
1251 /// immediate as a load from a constant pool.
1252 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
1253 bool ForCodeSize = false) const {
1254 return false;
1255 }
1256
1257 /// Targets can use this to indicate that they only support *some*
1258 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1259 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
1260 /// legal.
1261 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
1262 return true;
1263 }
1264
1265 /// Returns true if the operation can trap for the value type.
1266 ///
1267 /// VT must be a legal type. By default, we optimistically assume most
1268 /// operations don't trap except for integer divide and remainder.
1269 virtual bool canOpTrap(unsigned Op, EVT VT) const;
1270
1271 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1272 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1273 /// constant pool entry.
1274 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
1275 EVT /*VT*/) const {
1276 return false;
1277 }
1278
1279 /// How to legalize this custom operation?
1280 virtual LegalizeAction getCustomOperationAction(SDNode &Op) const {
1281 return Legal;
1282 }
1283
1284 /// Return how this operation should be treated: either it is legal, needs to
1285 /// be promoted to a larger size, needs to be expanded to some other code
1286 /// sequence, or the target has a custom expander for it.
1287 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
1288 // If a target-specific SDNode requires legalization, require the target
1289 // to provide custom legalization for it.
1290 if (Op >= std::size(OpActions[0]))
1291 return Custom;
1292 if (VT.isExtended())
1293 return Expand;
1294 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
1295 }
1296
1297 /// Custom method defined by each target to indicate if an operation which
1298 /// may require a scale is supported natively by the target.
1299 /// If not, the operation is illegal.
1300 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
1301 unsigned Scale) const {
1302 return false;
1303 }
1304
1305 /// Some fixed point operations may be natively supported by the target but
1306 /// only for specific scales. This method allows for checking
1307 /// if the width is supported by the target for a given operation that may
1308 /// depend on scale.
1309 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
1310 unsigned Scale) const {
1311 auto Action = getOperationAction(Op, VT);
1312 if (Action != Legal)
1313 return Action;
1314
1315 // This operation is supported in this type but may only work on specific
1316 // scales.
1317 bool Supported;
1318 switch (Op) {
1319 default:
1320 llvm_unreachable("Unexpected fixed point operation.");
1321 case ISD::SMULFIX:
1322 case ISD::SMULFIXSAT:
1323 case ISD::UMULFIX:
1324 case ISD::UMULFIXSAT:
1325 case ISD::SDIVFIX:
1326 case ISD::SDIVFIXSAT:
1327 case ISD::UDIVFIX:
1328 case ISD::UDIVFIXSAT:
1329 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
1330 break;
1331 }
1332
1333 return Supported ? Action : Expand;
1334 }
1335
1336 // If Op is a strict floating-point operation, return the result
1337 // of getOperationAction for the equivalent non-strict operation.
1338 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
1339 unsigned EqOpc;
1340 switch (Op) {
1341 default: llvm_unreachable("Unexpected FP pseudo-opcode");
1342#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1343 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
1344#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1345 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
1346#include "llvm/IR/ConstrainedOps.def"
1347 }
1348
1349 return getOperationAction(Op: EqOpc, VT);
1350 }
1351
1352 /// Return true if the specified operation is legal on this target or can be
1353 /// made legal with custom lowering. This is used to help guide high-level
1354 /// lowering decisions. LegalOnly is an optional convenience for code paths
1355 /// traversed pre and post legalisation.
1356 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1357 bool LegalOnly = false) const {
1358 if (LegalOnly)
1359 return isOperationLegal(Op, VT);
1360
1361 return (VT == MVT::Other || isTypeLegal(VT)) &&
1362 (getOperationAction(Op, VT) == Legal ||
1363 getOperationAction(Op, VT) == Custom);
1364 }
1365
1366 /// Return true if the specified operation is legal on this target or can be
1367 /// made legal using promotion. This is used to help guide high-level lowering
1368 /// decisions. LegalOnly is an optional convenience for code paths traversed
1369 /// pre and post legalisation.
1370 bool isOperationLegalOrPromote(unsigned Op, EVT VT,
1371 bool LegalOnly = false) const {
1372 if (LegalOnly)
1373 return isOperationLegal(Op, VT);
1374
1375 return (VT == MVT::Other || isTypeLegal(VT)) &&
1376 (getOperationAction(Op, VT) == Legal ||
1377 getOperationAction(Op, VT) == Promote);
1378 }
1379
1380 /// Return true if the specified operation is legal on this target or can be
1381 /// made legal with custom lowering or using promotion. This is used to help
1382 /// guide high-level lowering decisions. LegalOnly is an optional convenience
1383 /// for code paths traversed pre and post legalisation.
1384 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT,
1385 bool LegalOnly = false) const {
1386 if (LegalOnly)
1387 return isOperationLegal(Op, VT);
1388
1389 return (VT == MVT::Other || isTypeLegal(VT)) &&
1390 (getOperationAction(Op, VT) == Legal ||
1391 getOperationAction(Op, VT) == Custom ||
1392 getOperationAction(Op, VT) == Promote);
1393 }
1394
1395 /// Return true if the operation uses custom lowering, regardless of whether
1396 /// the type is legal or not.
1397 bool isOperationCustom(unsigned Op, EVT VT) const {
1398 return getOperationAction(Op, VT) == Custom;
1399 }
1400
1401 /// Return true if lowering to a jump table is allowed.
1402 virtual bool areJTsAllowed(const Function *Fn) const {
1403 if (Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool())
1404 return false;
1405
1406 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1407 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1408 }
1409
1410 /// Check whether the range [Low,High] fits in a machine word.
1411 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1412 const DataLayout &DL) const {
1413 // FIXME: Using the pointer type doesn't seem ideal.
1414 uint64_t BW = DL.getIndexSizeInBits(AS: 0u);
1415 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
1416 return Range <= BW;
1417 }
1418
1419 /// Return true if lowering to a jump table is suitable for a set of case
1420 /// clusters which may contain \p NumCases cases, \p Range range of values.
1421 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1422 uint64_t Range, ProfileSummaryInfo *PSI,
1423 BlockFrequencyInfo *BFI) const;
1424
1425 /// Returns preferred type for switch condition.
1426 virtual MVT getPreferredSwitchConditionType(LLVMContext &Context,
1427 EVT ConditionVT) const;
1428
1429 /// Return true if lowering to a bit test is suitable for a set of case
1430 /// clusters which contains \p NumDests unique destinations, \p Low and
1431 /// \p High as its lowest and highest case values, and expects \p NumCmps
1432 /// case value comparisons. Check if the number of destinations, comparison
1433 /// metric, and range are all suitable.
1434 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1435 const APInt &Low, const APInt &High,
1436 const DataLayout &DL) const {
1437 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1438 // range of cases both require only one branch to lower. Just looking at the
1439 // number of clusters and destinations should be enough to decide whether to
1440 // build bit tests.
1441
1442 // To lower a range with bit tests, the range must fit the bitwidth of a
1443 // machine word.
1444 if (!rangeFitsInWord(Low, High, DL))
1445 return false;
1446
1447 // Decide whether it's profitable to lower this range with bit tests. Each
1448 // destination requires a bit test and branch, and there is an overall range
1449 // check branch. For a small number of clusters, separate comparisons might
1450 // be cheaper, and for many destinations, splitting the range might be
1451 // better.
1452 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1453 (NumDests == 3 && NumCmps >= 6);
1454 }
1455
1456 /// Return true if the specified operation is illegal on this target or
1457 /// unlikely to be made legal with custom lowering. This is used to help guide
1458 /// high-level lowering decisions.
1459 bool isOperationExpand(unsigned Op, EVT VT) const {
1460 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1461 }
1462
1463 /// Return true if the specified operation is legal on this target.
1464 bool isOperationLegal(unsigned Op, EVT VT) const {
1465 return (VT == MVT::Other || isTypeLegal(VT)) &&
1466 getOperationAction(Op, VT) == Legal;
1467 }
1468
1469 /// Return how this load with extension should be treated: either it is legal,
1470 /// needs to be promoted to a larger size, needs to be expanded to some other
1471 /// code sequence, or the target has a custom expander for it.
1472 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1473 EVT MemVT) const {
1474 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1475 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1476 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1477 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
1478 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
1479 unsigned Shift = 4 * ExtType;
1480 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1481 }
1482
1483 /// Return true if the specified load with extension is legal on this target.
1484 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1485 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1486 }
1487
1488 /// Return true if the specified load with extension is legal or custom
1489 /// on this target.
1490 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1491 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1492 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1493 }
1494
1495 /// Same as getLoadExtAction, but for atomic loads.
1496 LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT,
1497 EVT MemVT) const {
1498 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1499 unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy;
1500 unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy;
1501 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
1502 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
1503 unsigned Shift = 4 * ExtType;
1504 LegalizeAction Action =
1505 (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf);
1506 assert((Action == Legal || Action == Expand) &&
1507 "Unsupported atomic load extension action.");
1508 return Action;
1509 }
1510
1511 /// Return true if the specified atomic load with extension is legal on
1512 /// this target.
1513 bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1514 return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1515 }
1516
1517 /// Return how this store with truncation should be treated: either it is
1518 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1519 /// other code sequence, or the target has a custom expander for it.
1520 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1521 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1522 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1523 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1524 assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE &&
1525 "Table isn't big enough!");
1526 return TruncStoreActions[ValI][MemI];
1527 }
1528
1529 /// Return true if the specified store with truncation is legal on this
1530 /// target.
1531 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1532 return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1533 }
1534
1535 /// Return true if the specified store with truncation has solution on this
1536 /// target.
1537 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1538 return isTypeLegal(VT: ValVT) &&
1539 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1540 getTruncStoreAction(ValVT, MemVT) == Custom);
1541 }
1542
1543 virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
1544 bool LegalOnly) const {
1545 if (LegalOnly)
1546 return isTruncStoreLegal(ValVT, MemVT);
1547
1548 return isTruncStoreLegalOrCustom(ValVT, MemVT);
1549 }
1550
1551 /// Return how the indexed load should be treated: either it is legal, needs
1552 /// to be promoted to a larger size, needs to be expanded to some other code
1553 /// sequence, or the target has a custom expander for it.
1554 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1555 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load);
1556 }
1557
1558 /// Return true if the specified indexed load is legal on this target.
1559 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1560 return VT.isSimple() &&
1561 (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1562 getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1563 }
1564
1565 /// Return how the indexed store should be treated: either it is legal, needs
1566 /// to be promoted to a larger size, needs to be expanded to some other code
1567 /// sequence, or the target has a custom expander for it.
1568 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1569 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store);
1570 }
1571
1572 /// Return true if the specified indexed load is legal on this target.
1573 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1574 return VT.isSimple() &&
1575 (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1576 getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1577 }
1578
1579 /// Return how the indexed load should be treated: either it is legal, needs
1580 /// to be promoted to a larger size, needs to be expanded to some other code
1581 /// sequence, or the target has a custom expander for it.
1582 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
1583 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad);
1584 }
1585
1586 /// Return true if the specified indexed load is legal on this target.
1587 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
1588 return VT.isSimple() &&
1589 (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1590 getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1591 }
1592
1593 /// Return how the indexed store should be treated: either it is legal, needs
1594 /// to be promoted to a larger size, needs to be expanded to some other code
1595 /// sequence, or the target has a custom expander for it.
1596 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
1597 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore);
1598 }
1599
1600 /// Return true if the specified indexed load is legal on this target.
1601 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
1602 return VT.isSimple() &&
1603 (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1604 getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1605 }
1606
1607 /// Returns true if the index type for a masked gather/scatter requires
1608 /// extending
1609 virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; }
1610
1611 // Returns true if Extend can be folded into the index of a masked gathers/scatters
1612 // on this target.
1613 virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const {
1614 return false;
1615 }
1616
1617 // Return true if the target supports a scatter/gather instruction with
1618 // indices which are scaled by the particular value. Note that all targets
1619 // must by definition support scale of 1.
1620 virtual bool isLegalScaleForGatherScatter(uint64_t Scale,
1621 uint64_t ElemSize) const {
1622 // MGATHER/MSCATTER are only required to support scaling by one or by the
1623 // element size.
1624 if (Scale != ElemSize && Scale != 1)
1625 return false;
1626 return true;
1627 }
1628
1629 /// Return how the condition code should be treated: either it is legal, needs
1630 /// to be expanded to some other code sequence, or the target has a custom
1631 /// expander for it.
1632 LegalizeAction
1633 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1634 assert((unsigned)CC < std::size(CondCodeActions) &&
1635 ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) &&
1636 "Table isn't big enough!");
1637 // See setCondCodeAction for how this is encoded.
1638 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1639 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1640 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1641 assert(Action != Promote && "Can't promote condition code!");
1642 return Action;
1643 }
1644
1645 /// Return true if the specified condition code is legal for a comparison of
1646 /// the specified types on this target.
1647 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1648 return getCondCodeAction(CC, VT) == Legal;
1649 }
1650
1651 /// Return true if the specified condition code is legal or custom for a
1652 /// comparison of the specified types on this target.
1653 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1654 return getCondCodeAction(CC, VT) == Legal ||
1655 getCondCodeAction(CC, VT) == Custom;
1656 }
1657
1658 /// Return how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type
1659 /// InputVT should be treated. Either it's legal, needs to be promoted to a
1660 /// larger size, needs to be expanded to some other code sequence, or the
1661 /// target has a custom expander for it.
1662 LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT,
1663 EVT InputVT) const {
1664 assert(Opc == ISD::PARTIAL_REDUCE_SMLA || Opc == ISD::PARTIAL_REDUCE_UMLA ||
1665 Opc == ISD::PARTIAL_REDUCE_SUMLA);
1666 PartialReduceActionTypes Key = {Opc, AccVT.getSimpleVT().SimpleTy,
1667 InputVT.getSimpleVT().SimpleTy};
1668 auto It = PartialReduceMLAActions.find(Val: Key);
1669 return It != PartialReduceMLAActions.end() ? It->second : Expand;
1670 }
1671
1672 /// Return true if a PARTIAL_REDUCE_U/SMLA node with the specified types is
1673 /// legal or custom for this target.
1674 bool isPartialReduceMLALegalOrCustom(unsigned Opc, EVT AccVT,
1675 EVT InputVT) const {
1676 LegalizeAction Action = getPartialReduceMLAAction(Opc, AccVT, InputVT);
1677 return Action == Legal || Action == Custom;
1678 }
1679
1680 /// If the action for this operation is to promote, this method returns the
1681 /// ValueType to promote to.
1682 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1683 assert(getOperationAction(Op, VT) == Promote &&
1684 "This operation isn't promoted!");
1685
1686 // See if this has an explicit type specified.
1687 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1688 MVT::SimpleValueType>::const_iterator PTTI =
1689 PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy));
1690 if (PTTI != PromoteToType.end()) return PTTI->second;
1691
1692 assert((VT.isInteger() || VT.isFloatingPoint()) &&
1693 "Cannot autopromote this type, add it with AddPromotedToType.");
1694
1695 uint64_t VTBits = VT.getScalarSizeInBits();
1696 MVT NVT = VT;
1697 do {
1698 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1699 assert(NVT.isInteger() == VT.isInteger() &&
1700 NVT.isFloatingPoint() == VT.isFloatingPoint() &&
1701 "Didn't find type to promote to!");
1702 } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(VT: NVT) ||
1703 getOperationAction(Op, VT: NVT) == Promote);
1704 return NVT;
1705 }
1706
1707 virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
1708 bool AllowUnknown = false) const {
1709 return getValueType(DL, Ty, AllowUnknown);
1710 }
1711
1712 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1713 /// operations except for the pointer size. If AllowUnknown is true, this
1714 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1715 /// otherwise it will assert.
1716 EVT getValueType(const DataLayout &DL, Type *Ty,
1717 bool AllowUnknown = false) const {
1718 // Lower scalar pointers to native pointer types.
1719 if (auto *PTy = dyn_cast<PointerType>(Val: Ty))
1720 return getPointerTy(DL, AS: PTy->getAddressSpace());
1721
1722 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
1723 Type *EltTy = VTy->getElementType();
1724 // Lower vectors of pointers to native pointer types.
1725 if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) {
1726 EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace()));
1727 EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext());
1728 }
1729 return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false),
1730 EC: VTy->getElementCount());
1731 }
1732
1733 return EVT::getEVT(Ty, HandleUnknown: AllowUnknown);
1734 }
1735
1736 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1737 bool AllowUnknown = false) const {
1738 // Lower scalar pointers to native pointer types.
1739 if (auto *PTy = dyn_cast<PointerType>(Val: Ty))
1740 return getPointerMemTy(DL, AS: PTy->getAddressSpace());
1741
1742 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
1743 Type *EltTy = VTy->getElementType();
1744 if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) {
1745 EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace()));
1746 EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext());
1747 }
1748 return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false),
1749 EC: VTy->getElementCount());
1750 }
1751
1752 return getValueType(DL, Ty, AllowUnknown);
1753 }
1754
1755
1756 /// Return the MVT corresponding to this LLVM type. See getValueType.
1757 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1758 bool AllowUnknown = false) const {
1759 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1760 }
1761
1762 /// Returns the desired alignment for ByVal or InAlloca aggregate function
1763 /// arguments in the caller parameter area.
1764 virtual Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1765
1766 /// Return the type of registers that this ValueType will eventually require.
1767 MVT getRegisterType(MVT VT) const {
1768 assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT));
1769 return RegisterTypeForVT[VT.SimpleTy];
1770 }
1771
1772 /// Return the type of registers that this ValueType will eventually require.
1773 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1774 if (VT.isSimple())
1775 return getRegisterType(VT: VT.getSimpleVT());
1776 if (VT.isVector()) {
1777 EVT VT1;
1778 MVT RegisterVT;
1779 unsigned NumIntermediates;
1780 (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1,
1781 NumIntermediates, RegisterVT);
1782 return RegisterVT;
1783 }
1784 if (VT.isInteger()) {
1785 return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT));
1786 }
1787 llvm_unreachable("Unsupported extended type!");
1788 }
1789
1790 /// Return the number of registers that this ValueType will eventually
1791 /// require.
1792 ///
1793 /// This is one for any types promoted to live in larger registers, but may be
1794 /// more than one for types (like i64) that are split into pieces. For types
1795 /// like i140, which are first promoted then expanded, it is the number of
1796 /// registers needed to hold all the bits of the original type. For an i140
1797 /// on a 32 bit machine this means 5 registers.
1798 ///
1799 /// RegisterVT may be passed as a way to override the default settings, for
1800 /// instance with i128 inline assembly operands on SystemZ.
1801 virtual unsigned
1802 getNumRegisters(LLVMContext &Context, EVT VT,
1803 std::optional<MVT> RegisterVT = std::nullopt) const {
1804 if (VT.isSimple()) {
1805 assert((unsigned)VT.getSimpleVT().SimpleTy <
1806 std::size(NumRegistersForVT));
1807 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1808 }
1809 if (VT.isVector()) {
1810 EVT VT1;
1811 MVT VT2;
1812 unsigned NumIntermediates;
1813 return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2);
1814 }
1815 if (VT.isInteger()) {
1816 unsigned BitWidth = VT.getSizeInBits();
1817 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1818 return (BitWidth + RegWidth - 1) / RegWidth;
1819 }
1820 llvm_unreachable("Unsupported extended type!");
1821 }
1822
1823 /// Certain combinations of ABIs, Targets and features require that types
1824 /// are legal for some operations and not for other operations.
1825 /// For MIPS all vector types must be passed through the integer register set.
1826 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1827 CallingConv::ID CC, EVT VT) const {
1828 return getRegisterType(Context, VT);
1829 }
1830
1831 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1832 /// this occurs when a vector type is used, as vector are passed through the
1833 /// integer register set.
1834 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1835 CallingConv::ID CC,
1836 EVT VT) const {
1837 return getNumRegisters(Context, VT);
1838 }
1839
1840 /// Certain targets have context sensitive alignment requirements, where one
1841 /// type has the alignment requirement of another type.
1842 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1843 const DataLayout &DL) const {
1844 return DL.getABITypeAlign(Ty: ArgTy);
1845 }
1846
1847 /// If true, then instruction selection should seek to shrink the FP constant
1848 /// of the specified type to a smaller type in order to save space and / or
1849 /// reduce runtime.
1850 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1851
1852 /// Return true if it is profitable to reduce a load to a smaller type.
1853 /// \p ByteOffset is only set if we know the pointer offset at compile time
1854 /// otherwise we should assume that additional pointer math is required.
1855 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1856 /// Example: (i16 (trunc (srl (i32 (load x)), 16)) -> i16 load x+2
1857 virtual bool shouldReduceLoadWidth(
1858 SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1859 std::optional<unsigned> ByteOffset = std::nullopt) const {
1860 // By default, assume that it is cheaper to extract a subvector from a wide
1861 // vector load rather than creating multiple narrow vector loads.
1862 if (NewVT.isVector() && !SDValue(Load, 0).hasOneUse())
1863 return false;
1864
1865 return true;
1866 }
1867
1868 /// Return true (the default) if it is profitable to remove a sext_inreg(x)
1869 /// where the sext is redundant, and use x directly.
1870 virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; }
1871
1872 /// Indicates if any padding is guaranteed to go at the most significant bits
1873 /// when storing the type to memory and the type size isn't equal to the store
1874 /// size.
1875 bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const {
1876 return VT.isScalarInteger() && !VT.isByteSized();
1877 }
1878
1879 /// When splitting a value of the specified type into parts, does the Lo
1880 /// or Hi part come first? This usually follows the endianness, except
1881 /// for ppcf128, where the Hi part always comes first.
1882 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1883 return DL.isBigEndian() || VT == MVT::ppcf128;
1884 }
1885
1886 /// If true, the target has custom DAG combine transformations that it can
1887 /// perform for the specified node.
1888 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1889 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray));
1890 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1891 }
1892
1893 unsigned getGatherAllAliasesMaxDepth() const {
1894 return GatherAllAliasesMaxDepth;
1895 }
1896
1897 /// Returns the size of the platform's va_list object.
1898 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1899 return getPointerTy(DL).getSizeInBits();
1900 }
1901
1902 /// Get maximum # of store operations permitted for llvm.memset
1903 ///
1904 /// This function returns the maximum number of store operations permitted
1905 /// to replace a call to llvm.memset. The value is set by the target at the
1906 /// performance threshold for such a replacement. If OptSize is true,
1907 /// return the limit for functions that have OptSize attribute.
1908 unsigned getMaxStoresPerMemset(bool OptSize) const {
1909 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1910 }
1911
1912 /// Get maximum # of store operations permitted for llvm.memcpy
1913 ///
1914 /// This function returns the maximum number of store operations permitted
1915 /// to replace a call to llvm.memcpy. The value is set by the target at the
1916 /// performance threshold for such a replacement. If OptSize is true,
1917 /// return the limit for functions that have OptSize attribute.
1918 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1919 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1920 }
1921
1922 /// \brief Get maximum # of store operations to be glued together
1923 ///
1924 /// This function returns the maximum number of store operations permitted
1925 /// to glue together during lowering of llvm.memcpy. The value is set by
1926 // the target at the performance threshold for such a replacement.
1927 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1928 return MaxGluedStoresPerMemcpy;
1929 }
1930
1931 /// Get maximum # of load operations permitted for memcmp
1932 ///
1933 /// This function returns the maximum number of load operations permitted
1934 /// to replace a call to memcmp. The value is set by the target at the
1935 /// performance threshold for such a replacement. If OptSize is true,
1936 /// return the limit for functions that have OptSize attribute.
1937 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1938 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1939 }
1940
1941 /// Get maximum # of store operations permitted for llvm.memmove
1942 ///
1943 /// This function returns the maximum number of store operations permitted
1944 /// to replace a call to llvm.memmove. The value is set by the target at the
1945 /// performance threshold for such a replacement. If OptSize is true,
1946 /// return the limit for functions that have OptSize attribute.
1947 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1948 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1949 }
1950
1951 /// Determine if the target supports unaligned memory accesses.
1952 ///
1953 /// This function returns true if the target allows unaligned memory accesses
1954 /// of the specified type in the given address space. If true, it also returns
1955 /// a relative speed of the unaligned memory access in the last argument by
1956 /// reference. The higher the speed number the faster the operation comparing
1957 /// to a number returned by another such call. This is used, for example, in
1958 /// situations where an array copy/move/set is converted to a sequence of
1959 /// store operations. Its use helps to ensure that such replacements don't
1960 /// generate code that causes an alignment error (trap) on the target machine.
1961 virtual bool allowsMisalignedMemoryAccesses(
1962 EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1963 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1964 unsigned * /*Fast*/ = nullptr) const {
1965 return false;
1966 }
1967
1968 /// LLT handling variant.
1969 virtual bool allowsMisalignedMemoryAccesses(
1970 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1971 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1972 unsigned * /*Fast*/ = nullptr) const {
1973 return false;
1974 }
1975
1976 /// This function returns true if the memory access is aligned or if the
1977 /// target allows this specific unaligned memory access. If the access is
1978 /// allowed, the optional final parameter returns a relative speed of the
1979 /// access (as defined by the target).
1980 bool allowsMemoryAccessForAlignment(
1981 LLVMContext &Context, const DataLayout &DL, EVT VT,
1982 unsigned AddrSpace = 0, Align Alignment = Align(1),
1983 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1984 unsigned *Fast = nullptr) const;
1985
1986 /// Return true if the memory access of this type is aligned or if the target
1987 /// allows this specific unaligned access for the given MachineMemOperand.
1988 /// If the access is allowed, the optional final parameter returns a relative
1989 /// speed of the access (as defined by the target).
1990 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1991 const DataLayout &DL, EVT VT,
1992 const MachineMemOperand &MMO,
1993 unsigned *Fast = nullptr) const;
1994
1995 /// Return true if the target supports a memory access of this type for the
1996 /// given address space and alignment. If the access is allowed, the optional
1997 /// final parameter returns the relative speed of the access (as defined by
1998 /// the target).
1999 virtual bool
2000 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
2001 unsigned AddrSpace = 0, Align Alignment = Align(1),
2002 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
2003 unsigned *Fast = nullptr) const;
2004
2005 /// Return true if the target supports a memory access of this type for the
2006 /// given MachineMemOperand. If the access is allowed, the optional
2007 /// final parameter returns the relative access speed (as defined by the
2008 /// target).
2009 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
2010 const MachineMemOperand &MMO,
2011 unsigned *Fast = nullptr) const;
2012
2013 /// LLT handling variant.
2014 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
2015 const MachineMemOperand &MMO,
2016 unsigned *Fast = nullptr) const;
2017
2018 /// Returns the target specific optimal type for load and store operations as
2019 /// a result of memset, memcpy, and memmove lowering.
2020 /// It returns EVT::Other if the type should be determined using generic
2021 /// target-independent logic.
2022 virtual EVT
2023 getOptimalMemOpType(const MemOp &Op,
2024 const AttributeList & /*FuncAttributes*/) const {
2025 return MVT::Other;
2026 }
2027
2028 /// LLT returning variant.
2029 virtual LLT
2030 getOptimalMemOpLLT(const MemOp &Op,
2031 const AttributeList & /*FuncAttributes*/) const {
2032 return LLT();
2033 }
2034
2035 /// Returns true if it's safe to use load / store of the specified type to
2036 /// expand memcpy / memset inline.
2037 ///
2038 /// This is mostly true for all types except for some special cases. For
2039 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
2040 /// fstpl which also does type conversion. Note the specified type doesn't
2041 /// have to be legal as the hook is used before type legalization.
2042 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
2043
2044 /// Return lower limit for number of blocks in a jump table.
2045 virtual unsigned getMinimumJumpTableEntries() const;
2046
2047 /// Return lower limit of the density in a jump table.
2048 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
2049
2050 /// Return upper limit for number of entries in a jump table.
2051 /// Zero if no limit.
2052 unsigned getMaximumJumpTableSize() const;
2053
2054 virtual bool isJumpTableRelative() const;
2055
2056 /// If a physical register, this specifies the register that
2057 /// llvm.savestack/llvm.restorestack should save and restore.
2058 Register getStackPointerRegisterToSaveRestore() const {
2059 return StackPointerRegisterToSaveRestore;
2060 }
2061
2062 /// If a physical register, this returns the register that receives the
2063 /// exception address on entry to an EH pad.
2064 virtual Register
2065 getExceptionPointerRegister(const Constant *PersonalityFn) const {
2066 return Register();
2067 }
2068
2069 /// If a physical register, this returns the register that receives the
2070 /// exception typeid on entry to a landing pad.
2071 virtual Register
2072 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
2073 return Register();
2074 }
2075
2076 virtual bool needsFixedCatchObjects() const {
2077 report_fatal_error(reason: "Funclet EH is not implemented for this target");
2078 }
2079
2080 /// Return the minimum stack alignment of an argument.
2081 Align getMinStackArgumentAlignment() const {
2082 return MinStackArgumentAlignment;
2083 }
2084
2085 /// Return the minimum function alignment.
2086 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
2087
2088 /// Return the preferred function alignment.
2089 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
2090
2091 /// Return the preferred loop alignment.
2092 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;
2093
2094 /// Return the maximum amount of bytes allowed to be emitted when padding for
2095 /// alignment
2096 virtual unsigned
2097 getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const;
2098
2099 /// Should loops be aligned even when the function is marked OptSize (but not
2100 /// MinSize).
2101 virtual bool alignLoopsWithOptSize() const { return false; }
2102
2103 /// If the target has a standard location for the stack protector guard,
2104 /// returns the address of that location. Otherwise, returns nullptr.
2105 /// DEPRECATED: please override useLoadStackGuardNode and customize
2106 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
2107 virtual Value *getIRStackGuard(IRBuilderBase &IRB) const;
2108
2109 /// Inserts necessary declarations for SSP (stack protection) purpose.
2110 /// Should be used only when getIRStackGuard returns nullptr.
2111 virtual void insertSSPDeclarations(Module &M) const;
2112
2113 /// Return the variable that's previously inserted by insertSSPDeclarations,
2114 /// if any, otherwise return nullptr. Should be used only when
2115 /// getIRStackGuard returns nullptr.
2116 virtual Value *getSDagStackGuard(const Module &M) const;
2117
2118 /// If this function returns true, stack protection checks should XOR the
2119 /// frame pointer (or whichever pointer is used to address locals) into the
2120 /// stack guard value before checking it. getIRStackGuard must return nullptr
2121 /// if this returns true.
2122 virtual bool useStackGuardXorFP() const { return false; }
2123
2124 /// If the target has a standard stack protection check function that
2125 /// performs validation and error handling, returns the function. Otherwise,
2126 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
2127 /// Should be used only when getIRStackGuard returns nullptr.
2128 virtual Function *getSSPStackGuardCheck(const Module &M) const;
2129
2130protected:
2131 Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
2132 bool UseTLS) const;
2133
2134public:
2135 /// Returns the target-specific address of the unsafe stack pointer.
2136 virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const;
2137
2138 /// Returns the name of the symbol used to emit stack probes or the empty
2139 /// string if not applicable.
2140 virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; }
2141
2142 virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; }
2143
2144 virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const {
2145 return "";
2146 }
2147
2148 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
2149 /// are happy to sink it into basic blocks. A cast may be free, but not
2150 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
2151 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
2152
2153 /// Return true if the pointer arguments to CI should be aligned by aligning
2154 /// the object whose address is being passed. If so then MinSize is set to the
2155 /// minimum size the object must be to be aligned and PrefAlign is set to the
2156 /// preferred alignment.
2157 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
2158 Align & /*PrefAlign*/) const {
2159 return false;
2160 }
2161
2162 //===--------------------------------------------------------------------===//
2163 /// \name Helpers for TargetTransformInfo implementations
2164 /// @{
2165
2166 /// Get the ISD node that corresponds to the Instruction class opcode.
2167 int InstructionOpcodeToISD(unsigned Opcode) const;
2168
2169 /// Get the ISD node that corresponds to the Intrinsic ID. Returns
2170 /// ISD::DELETED_NODE by default for an unsupported Intrinsic ID.
2171 int IntrinsicIDToISD(Intrinsic::ID ID) const;
2172
2173 /// @}
2174
2175 //===--------------------------------------------------------------------===//
2176 /// \name Helpers for atomic expansion.
2177 /// @{
2178
2179 /// Returns the maximum atomic operation size (in bits) supported by
2180 /// the backend. Atomic operations greater than this size (as well
2181 /// as ones that are not naturally aligned), will be expanded by
2182 /// AtomicExpandPass into an __atomic_* library call.
2183 unsigned getMaxAtomicSizeInBitsSupported() const {
2184 return MaxAtomicSizeInBitsSupported;
2185 }
2186
2187 /// Returns the size in bits of the maximum div/rem the backend supports.
2188 /// Larger operations will be expanded by ExpandLargeDivRem.
2189 unsigned getMaxDivRemBitWidthSupported() const {
2190 return MaxDivRemBitWidthSupported;
2191 }
2192
2193 /// Returns the size in bits of the maximum fp to/from int conversion the
2194 /// backend supports. Larger operations will be expanded by ExpandFp.
2195 unsigned getMaxLargeFPConvertBitWidthSupported() const {
2196 return MaxLargeFPConvertBitWidthSupported;
2197 }
2198
2199 /// Returns the size of the smallest cmpxchg or ll/sc instruction
2200 /// the backend supports. Any smaller operations are widened in
2201 /// AtomicExpandPass.
2202 ///
2203 /// Note that *unlike* operations above the maximum size, atomic ops
2204 /// are still natively supported below the minimum; they just
2205 /// require a more complex expansion.
2206 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
2207
2208 /// Whether the target supports unaligned atomic operations.
2209 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
2210
2211 /// Whether AtomicExpandPass should automatically insert fences and reduce
2212 /// ordering for this atomic. This should be true for most architectures with
2213 /// weak memory ordering. Defaults to false.
2214 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
2215 return false;
2216 }
2217
2218 // The memory ordering that AtomicExpandPass should assign to a atomic
2219 // instruction that it has lowered by adding fences. This can be used
2220 // to "fold" one of the fences into the atomic instruction.
2221 virtual AtomicOrdering
2222 atomicOperationOrderAfterFenceSplit(const Instruction *I) const {
2223 return AtomicOrdering::Monotonic;
2224 }
2225
2226 /// Whether AtomicExpandPass should automatically insert a trailing fence
2227 /// without reducing the ordering for this atomic. Defaults to false.
2228 virtual bool
2229 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const {
2230 return false;
2231 }
2232
2233 /// Perform a load-linked operation on Addr, returning a "Value *" with the
2234 /// corresponding pointee type. This may entail some non-trivial operations to
2235 /// truncate or reconstruct types that will be illegal in the backend. See
2236 /// ARMISelLowering for an example implementation.
2237 virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
2238 Value *Addr, AtomicOrdering Ord) const {
2239 llvm_unreachable("Load linked unimplemented on this target");
2240 }
2241
2242 /// Perform a store-conditional operation to Addr. Return the status of the
2243 /// store. This should be 0 if the store succeeded, non-zero otherwise.
2244 virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val,
2245 Value *Addr, AtomicOrdering Ord) const {
2246 llvm_unreachable("Store conditional unimplemented on this target");
2247 }
2248
2249 /// Perform a masked atomicrmw using a target-specific intrinsic. This
2250 /// represents the core LL/SC loop which will be lowered at a late stage by
2251 /// the backend. The target-specific intrinsic returns the loaded value and
2252 /// is not responsible for masking and shifting the result.
2253 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
2254 AtomicRMWInst *AI,
2255 Value *AlignedAddr, Value *Incr,
2256 Value *Mask, Value *ShiftAmt,
2257 AtomicOrdering Ord) const {
2258 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
2259 }
2260
2261 /// Perform a atomicrmw expansion using a target-specific way. This is
2262 /// expected to be called when masked atomicrmw and bit test atomicrmw don't
2263 /// work, and the target supports another way to lower atomicrmw.
2264 virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const {
2265 llvm_unreachable(
2266 "Generic atomicrmw expansion unimplemented on this target");
2267 }
2268
2269 /// Perform a cmpxchg expansion using a target-specific method.
2270 virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
2271 llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
2272 }
2273
2274 /// Perform a bit test atomicrmw using a target-specific intrinsic. This
2275 /// represents the combined bit test intrinsic which will be lowered at a late
2276 /// stage by the backend.
2277 virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
2278 llvm_unreachable(
2279 "Bit test atomicrmw expansion unimplemented on this target");
2280 }
2281
2282 /// Perform a atomicrmw which the result is only used by comparison, using a
2283 /// target-specific intrinsic. This represents the combined atomic and compare
2284 /// intrinsic which will be lowered at a late stage by the backend.
2285 virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
2286 llvm_unreachable(
2287 "Compare arith atomicrmw expansion unimplemented on this target");
2288 }
2289
2290 /// Perform a masked cmpxchg using a target-specific intrinsic. This
2291 /// represents the core LL/SC loop which will be lowered at a late stage by
2292 /// the backend. The target-specific intrinsic returns the loaded value and
2293 /// is not responsible for masking and shifting the result.
2294 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
2295 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2296 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2297 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
2298 }
2299
2300 //===--------------------------------------------------------------------===//
2301 /// \name KCFI check lowering.
2302 /// @{
2303
2304 virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
2305 MachineBasicBlock::instr_iterator &MBBI,
2306 const TargetInstrInfo *TII) const {
2307 llvm_unreachable("KCFI is not supported on this target");
2308 }
2309
2310 /// @}
2311
2312 /// Inserts in the IR a target-specific intrinsic specifying a fence.
2313 /// It is called by AtomicExpandPass before expanding an
2314 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
2315 /// if shouldInsertFencesForAtomic returns true.
2316 ///
2317 /// Inst is the original atomic instruction, prior to other expansions that
2318 /// may be performed.
2319 ///
2320 /// This function should either return a nullptr, or a pointer to an IR-level
2321 /// Instruction*. Even complex fence sequences can be represented by a
2322 /// single Instruction* through an intrinsic to be lowered later.
2323 ///
2324 /// The default implementation emits an IR fence before any release (or
2325 /// stronger) operation that stores, and after any acquire (or stronger)
2326 /// operation. This is generally a correct implementation, but backends may
2327 /// override if they wish to use alternative schemes (e.g. the PowerPC
2328 /// standard ABI uses a fence before a seq_cst load instead of after a
2329 /// seq_cst store).
2330 /// @{
2331 virtual Instruction *emitLeadingFence(IRBuilderBase &Builder,
2332 Instruction *Inst,
2333 AtomicOrdering Ord) const;
2334
2335 virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
2336 Instruction *Inst,
2337 AtomicOrdering Ord) const;
2338 /// @}
2339
2340 // Emits code that executes when the comparison result in the ll/sc
2341 // expansion of a cmpxchg instruction is such that the store-conditional will
2342 // not execute. This makes it possible to balance out the load-linked with
2343 // a dedicated instruction, if desired.
2344 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
2345 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
2346 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {}
2347
2348 /// Returns true if arguments should be sign-extended in lib calls.
2349 virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const {
2350 return IsSigned;
2351 }
2352
2353 /// Returns true if arguments should be extended in lib calls.
2354 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
2355 return true;
2356 }
2357
2358 /// Returns how the given (atomic) load should be expanded by the
2359 /// IR-level AtomicExpand pass.
2360 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2361 return AtomicExpansionKind::None;
2362 }
2363
2364 /// Returns how the given (atomic) load should be cast by the IR-level
2365 /// AtomicExpand pass.
2366 virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const {
2367 if (LI->getType()->isFloatingPointTy())
2368 return AtomicExpansionKind::CastToInteger;
2369 return AtomicExpansionKind::None;
2370 }
2371
2372 /// Returns how the given (atomic) store should be expanded by the IR-level
2373 /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
2374 /// to use an atomicrmw xchg.
2375 virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
2376 return AtomicExpansionKind::None;
2377 }
2378
2379 /// Returns how the given (atomic) store should be cast by the IR-level
2380 /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger
2381 /// will try to cast the operands to integer values.
2382 virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const {
2383 if (SI->getValueOperand()->getType()->isFloatingPointTy())
2384 return AtomicExpansionKind::CastToInteger;
2385 return AtomicExpansionKind::None;
2386 }
2387
2388 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
2389 /// AtomicExpand pass.
2390 virtual AtomicExpansionKind
2391 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
2392 return AtomicExpansionKind::None;
2393 }
2394
2395 /// Returns how the IR-level AtomicExpand pass should expand the given
2396 /// AtomicRMW, if at all. Default is to never expand.
2397 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2398 return RMW->isFloatingPointOperation() ?
2399 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
2400 }
2401
2402 /// Returns how the given atomic atomicrmw should be cast by the IR-level
2403 /// AtomicExpand pass.
2404 virtual AtomicExpansionKind
2405 shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const {
2406 if (RMWI->getOperation() == AtomicRMWInst::Xchg &&
2407 (RMWI->getValOperand()->getType()->isFloatingPointTy() ||
2408 RMWI->getValOperand()->getType()->isPointerTy()))
2409 return AtomicExpansionKind::CastToInteger;
2410
2411 return AtomicExpansionKind::None;
2412 }
2413
2414 /// On some platforms, an AtomicRMW that never actually modifies the value
2415 /// (such as fetch_add of 0) can be turned into a fence followed by an
2416 /// atomic load. This may sound useless, but it makes it possible for the
2417 /// processor to keep the cacheline shared, dramatically improving
2418 /// performance. And such idempotent RMWs are useful for implementing some
2419 /// kinds of locks, see for example (justification + benchmarks):
2420 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
2421 /// This method tries doing that transformation, returning the atomic load if
2422 /// it succeeds, and nullptr otherwise.
2423 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
2424 /// another round of expansion.
2425 virtual LoadInst *
2426 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
2427 return nullptr;
2428 }
2429
2430 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
2431 /// SIGN_EXTEND, or ANY_EXTEND).
2432 virtual ISD::NodeType getExtendForAtomicOps() const {
2433 return ISD::ZERO_EXTEND;
2434 }
2435
2436 /// Returns how the platform's atomic compare and swap expects its comparison
2437 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is
2438 /// separate from getExtendForAtomicOps, which is concerned with the
2439 /// sign-extension of the instruction's output, whereas here we are concerned
2440 /// with the sign-extension of the input. For targets with compare-and-swap
2441 /// instructions (or sub-word comparisons in their LL/SC loop expansions),
2442 /// the input can be ANY_EXTEND, but the output will still have a specific
2443 /// extension.
2444 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const {
2445 return ISD::ANY_EXTEND;
2446 }
2447
2448 /// @}
2449
2450 /// Returns true if we should normalize
2451 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
2452 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
2453 /// that it saves us from materializing N0 and N1 in an integer register.
2454 /// Targets that are able to perform and/or on flags should return false here.
2455 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
2456 EVT VT) const {
2457 // If a target has multiple condition registers, then it likely has logical
2458 // operations on those registers.
2459 if (hasMultipleConditionRegisters())
2460 return false;
2461 // Only do the transform if the value won't be split into multiple
2462 // registers.
2463 LegalizeTypeAction Action = getTypeAction(Context, VT);
2464 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
2465 Action != TypeSplitVector;
2466 }
2467
2468 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
2469
2470 /// Return true if a select of constants (select Cond, C1, C2) should be
2471 /// transformed into simple math ops with the condition value. For example:
2472 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
2473 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
2474 return false;
2475 }
2476
2477 /// Return true if it is profitable to transform an integer
2478 /// multiplication-by-constant into simpler operations like shifts and adds.
2479 /// This may be true if the target does not directly support the
2480 /// multiplication operation for the specified type or the sequence of simpler
2481 /// ops is faster than the multiply.
2482 virtual bool decomposeMulByConstant(LLVMContext &Context,
2483 EVT VT, SDValue C) const {
2484 return false;
2485 }
2486
2487 /// Return true if it may be profitable to transform
2488 /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
2489 /// This may not be true if c1 and c2 can be represented as immediates but
2490 /// c1*c2 cannot, for example.
2491 /// The target should check if c1, c2 and c1*c2 can be represented as
2492 /// immediates, or have to be materialized into registers. If it is not sure
2493 /// about some cases, a default true can be returned to let the DAGCombiner
2494 /// decide.
2495 /// AddNode is (add x, c1), and ConstNode is c2.
2496 virtual bool isMulAddWithConstProfitable(SDValue AddNode,
2497 SDValue ConstNode) const {
2498 return true;
2499 }
2500
2501 /// Return true if it is more correct/profitable to use strict FP_TO_INT
2502 /// conversion operations - canonicalizing the FP source value instead of
2503 /// converting all cases and then selecting based on value.
2504 /// This may be true if the target throws exceptions for out of bounds
2505 /// conversions or has fast FP CMOV.
2506 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
2507 bool IsSigned) const {
2508 return false;
2509 }
2510
2511 /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic.
2512 /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always
2513 /// considered beneficial.
2514 /// If optimizing for size, expansion is only considered beneficial for upto
2515 /// 5 multiplies and a divide (if the exponent is negative).
2516 bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const {
2517 if (Exponent < 0)
2518 Exponent = -Exponent;
2519 uint64_t E = static_cast<uint64_t>(Exponent);
2520 return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7);
2521 }
2522
2523 //===--------------------------------------------------------------------===//
2524 // TargetLowering Configuration Methods - These methods should be invoked by
2525 // the derived class constructor to configure this object for the target.
2526 //
2527protected:
2528 /// Specify how the target extends the result of integer and floating point
2529 /// boolean values from i1 to a wider type. See getBooleanContents.
2530 void setBooleanContents(BooleanContent Ty) {
2531 BooleanContents = Ty;
2532 BooleanFloatContents = Ty;
2533 }
2534
2535 /// Specify how the target extends the result of integer and floating point
2536 /// boolean values from i1 to a wider type. See getBooleanContents.
2537 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
2538 BooleanContents = IntTy;
2539 BooleanFloatContents = FloatTy;
2540 }
2541
2542 /// Specify how the target extends the result of a vector boolean value from a
2543 /// vector of i1 to a wider type. See getBooleanContents.
2544 void setBooleanVectorContents(BooleanContent Ty) {
2545 BooleanVectorContents = Ty;
2546 }
2547
2548 /// Specify the target scheduling preference.
2549 void setSchedulingPreference(Sched::Preference Pref) {
2550 SchedPreferenceInfo = Pref;
2551 }
2552
2553 /// Indicate the minimum number of blocks to generate jump tables.
2554 void setMinimumJumpTableEntries(unsigned Val);
2555
2556 /// Indicate the maximum number of entries in jump tables.
2557 /// Set to zero to generate unlimited jump tables.
2558 void setMaximumJumpTableSize(unsigned);
2559
2560 /// If set to a physical register, this specifies the register that
2561 /// llvm.savestack/llvm.restorestack should save and restore.
2562 void setStackPointerRegisterToSaveRestore(Register R) {
2563 StackPointerRegisterToSaveRestore = R;
2564 }
2565
2566 /// Tells the code generator that the target has multiple (allocatable)
2567 /// condition registers that can be used to store the results of comparisons
2568 /// for use by selects and conditional branches. With multiple condition
2569 /// registers, the code generator will not aggressively sink comparisons into
2570 /// the blocks of their users.
2571 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2572 HasMultipleConditionRegisters = hasManyRegs;
2573 }
2574
2575 /// Tells the code generator that the target has BitExtract instructions.
2576 /// The code generator will aggressively sink "shift"s into the blocks of
2577 /// their users if the users will generate "and" instructions which can be
2578 /// combined with "shift" to BitExtract instructions.
2579 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
2580 HasExtractBitsInsn = hasExtractInsn;
2581 }
2582
2583 /// Tells the code generator not to expand logic operations on comparison
2584 /// predicates into separate sequences that increase the amount of flow
2585 /// control.
2586 void setJumpIsExpensive(bool isExpensive = true);
2587
2588 /// Tells the code generator which bitwidths to bypass.
2589 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2590 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2591 }
2592
2593 /// Add the specified register class as an available regclass for the
2594 /// specified value type. This indicates the selector can handle values of
2595 /// that class natively.
2596 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2597 assert((unsigned)VT.SimpleTy < std::size(RegClassForVT));
2598 RegClassForVT[VT.SimpleTy] = RC;
2599 }
2600
2601 /// Return the largest legal super-reg register class of the register class
2602 /// for the specified type and its associated "cost".
2603 virtual std::pair<const TargetRegisterClass *, uint8_t>
2604 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2605
2606 /// Once all of the register classes are added, this allows us to compute
2607 /// derived properties we expose.
2608 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2609
2610 /// Indicate that the specified operation does not work with the specified
2611 /// type and indicate what to do about it. Note that VT may refer to either
2612 /// the type of a result or that of an operand of Op.
2613 void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
2614 assert(Op < std::size(OpActions[0]) && "Table isn't big enough!");
2615 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2616 }
2617 void setOperationAction(ArrayRef<unsigned> Ops, MVT VT,
2618 LegalizeAction Action) {
2619 for (auto Op : Ops)
2620 setOperationAction(Op, VT, Action);
2621 }
2622 void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs,
2623 LegalizeAction Action) {
2624 for (auto VT : VTs)
2625 setOperationAction(Ops, VT, Action);
2626 }
2627
2628 /// Indicate that the specified load with extension does not work with the
2629 /// specified type and indicate what to do about it.
2630 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2631 LegalizeAction Action) {
2632 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
2633 MemVT.isValid() && "Table isn't big enough!");
2634 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2635 unsigned Shift = 4 * ExtType;
2636 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2637 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2638 }
2639 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
2640 LegalizeAction Action) {
2641 for (auto ExtType : ExtTypes)
2642 setLoadExtAction(ExtType, ValVT, MemVT, Action);
2643 }
2644 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
2645 ArrayRef<MVT> MemVTs, LegalizeAction Action) {
2646 for (auto MemVT : MemVTs)
2647 setLoadExtAction(ExtTypes, ValVT, MemVT, Action);
2648 }
2649
2650 /// Let target indicate that an extending atomic load of the specified type
2651 /// is legal.
2652 void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2653 LegalizeAction Action) {
2654 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
2655 MemVT.isValid() && "Table isn't big enough!");
2656 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2657 unsigned Shift = 4 * ExtType;
2658 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &=
2659 ~((uint16_t)0xF << Shift);
2660 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |=
2661 ((uint16_t)Action << Shift);
2662 }
2663 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
2664 LegalizeAction Action) {
2665 for (auto ExtType : ExtTypes)
2666 setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action);
2667 }
2668 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
2669 ArrayRef<MVT> MemVTs, LegalizeAction Action) {
2670 for (auto MemVT : MemVTs)
2671 setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action);
2672 }
2673
2674 /// Indicate that the specified truncating store does not work with the
2675 /// specified type and indicate what to do about it.
2676 void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) {
2677 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
2678 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2679 }
2680
2681 /// Indicate that the specified indexed load does or does not work with the
2682 /// specified type and indicate what to do abort it.
2683 ///
2684 /// NOTE: All indexed mode loads are initialized to Expand in
2685 /// TargetLowering.cpp
2686 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT,
2687 LegalizeAction Action) {
2688 for (auto IdxMode : IdxModes)
2689 setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action);
2690 }
2691
2692 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
2693 LegalizeAction Action) {
2694 for (auto VT : VTs)
2695 setIndexedLoadAction(IdxModes, VT, Action);
2696 }
2697
2698 /// Indicate that the specified indexed store does or does not work with the
2699 /// specified type and indicate what to do about it.
2700 ///
2701 /// NOTE: All indexed mode stores are initialized to Expand in
2702 /// TargetLowering.cpp
2703 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT,
2704 LegalizeAction Action) {
2705 for (auto IdxMode : IdxModes)
2706 setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action);
2707 }
2708
2709 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
2710 LegalizeAction Action) {
2711 for (auto VT : VTs)
2712 setIndexedStoreAction(IdxModes, VT, Action);
2713 }
2714
2715 /// Indicate that the specified indexed masked load does or does not work with
2716 /// the specified type and indicate what to do about it.
2717 ///
2718 /// NOTE: All indexed mode masked loads are initialized to Expand in
2719 /// TargetLowering.cpp
2720 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
2721 LegalizeAction Action) {
2722 setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action);
2723 }
2724
2725 /// Indicate that the specified indexed masked store does or does not work
2726 /// with the specified type and indicate what to do about it.
2727 ///
2728 /// NOTE: All indexed mode masked stores are initialized to Expand in
2729 /// TargetLowering.cpp
2730 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
2731 LegalizeAction Action) {
2732 setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action);
2733 }
2734
2735 /// Indicate that the specified condition code is or isn't supported on the
2736 /// target and indicate what to do about it.
2737 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT,
2738 LegalizeAction Action) {
2739 for (auto CC : CCs) {
2740 assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) &&
2741 "Table isn't big enough!");
2742 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2743 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the
2744 /// 32-bit value and the upper 29 bits index into the second dimension of
2745 /// the array to select what 32-bit value to use.
2746 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2747 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2748 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2749 }
2750 }
2751 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs,
2752 LegalizeAction Action) {
2753 for (auto VT : VTs)
2754 setCondCodeAction(CCs, VT, Action);
2755 }
2756
2757 /// Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input
2758 /// type InputVT should be treated by the target. Either it's legal, needs to
2759 /// be promoted to a larger size, needs to be expanded to some other code
2760 /// sequence, or the target has a custom expander for it.
2761 void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT,
2762 LegalizeAction Action) {
2763 assert(Opc == ISD::PARTIAL_REDUCE_SMLA || Opc == ISD::PARTIAL_REDUCE_UMLA ||
2764 Opc == ISD::PARTIAL_REDUCE_SUMLA);
2765 assert(AccVT.isValid() && InputVT.isValid() &&
2766 "setPartialReduceMLAAction types aren't valid");
2767 PartialReduceActionTypes Key = {Opc, AccVT.SimpleTy, InputVT.SimpleTy};
2768 PartialReduceMLAActions[Key] = Action;
2769 }
2770 void setPartialReduceMLAAction(ArrayRef<unsigned> Opcodes, MVT AccVT,
2771 MVT InputVT, LegalizeAction Action) {
2772 for (unsigned Opc : Opcodes)
2773 setPartialReduceMLAAction(Opc, AccVT, InputVT, Action);
2774 }
2775
2776 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2777 /// to trying a larger integer/fp until it can find one that works. If that
2778 /// default is insufficient, this method can be used by the target to override
2779 /// the default.
2780 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2781 PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy;
2782 }
2783
2784 /// Convenience method to set an operation to Promote and specify the type
2785 /// in a single call.
2786 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2787 setOperationAction(Op: Opc, VT: OrigVT, Action: Promote);
2788 AddPromotedToType(Opc, OrigVT, DestVT);
2789 }
2790 void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT,
2791 MVT DestVT) {
2792 for (auto Op : Ops) {
2793 setOperationAction(Op, VT: OrigVT, Action: Promote);
2794 AddPromotedToType(Opc: Op, OrigVT, DestVT);
2795 }
2796 }
2797
2798 /// Targets should invoke this method for each target independent node that
2799 /// they want to provide a custom DAG combiner for by implementing the
2800 /// PerformDAGCombine virtual method.
2801 void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) {
2802 for (auto NT : NTs) {
2803 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray));
2804 TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7);
2805 }
2806 }
2807
2808 /// Set the target's minimum function alignment.
2809 void setMinFunctionAlignment(Align Alignment) {
2810 MinFunctionAlignment = Alignment;
2811 }
2812
2813 /// Set the target's preferred function alignment. This should be set if
2814 /// there is a performance benefit to higher-than-minimum alignment
2815 void setPrefFunctionAlignment(Align Alignment) {
2816 PrefFunctionAlignment = Alignment;
2817 }
2818
2819 /// Set the target's preferred loop alignment. Default alignment is one, it
2820 /// means the target does not care about loop alignment. The target may also
2821 /// override getPrefLoopAlignment to provide per-loop values.
2822 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2823 void setMaxBytesForAlignment(unsigned MaxBytes) {
2824 MaxBytesForAlignment = MaxBytes;
2825 }
2826
2827 /// Set the minimum stack alignment of an argument.
2828 void setMinStackArgumentAlignment(Align Alignment) {
2829 MinStackArgumentAlignment = Alignment;
2830 }
2831
2832 /// Set the maximum atomic operation size supported by the
2833 /// backend. Atomic operations greater than this size (as well as
2834 /// ones that are not naturally aligned), will be expanded by
2835 /// AtomicExpandPass into an __atomic_* library call.
2836 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2837 MaxAtomicSizeInBitsSupported = SizeInBits;
2838 }
2839
2840 /// Set the size in bits of the maximum div/rem the backend supports.
2841 /// Larger operations will be expanded by ExpandLargeDivRem.
2842 void setMaxDivRemBitWidthSupported(unsigned SizeInBits) {
2843 MaxDivRemBitWidthSupported = SizeInBits;
2844 }
2845
2846 /// Set the size in bits of the maximum fp to/from int conversion the backend
2847 /// supports. Larger operations will be expanded by ExpandFp.
2848 void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) {
2849 MaxLargeFPConvertBitWidthSupported = SizeInBits;
2850 }
2851
2852 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2853 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2854 MinCmpXchgSizeInBits = SizeInBits;
2855 }
2856
2857 /// Sets whether unaligned atomic operations are supported.
2858 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2859 SupportsUnalignedAtomics = UnalignedSupported;
2860 }
2861
2862public:
2863 //===--------------------------------------------------------------------===//
2864 // Addressing mode description hooks (used by LSR etc).
2865 //
2866
2867 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2868 /// instructions reading the address. This allows as much computation as
2869 /// possible to be done in the address mode for that operand. This hook lets
2870 /// targets also pass back when this should be done on intrinsics which
2871 /// load/store.
2872 virtual bool getAddrModeArguments(const IntrinsicInst * /*I*/,
2873 SmallVectorImpl<Value *> & /*Ops*/,
2874 Type *& /*AccessTy*/) const {
2875 return false;
2876 }
2877
2878 /// This represents an addressing mode of:
2879 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale
2880 /// If BaseGV is null, there is no BaseGV.
2881 /// If BaseOffs is zero, there is no base offset.
2882 /// If HasBaseReg is false, there is no base register.
2883 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2884 /// no scale.
2885 /// If ScalableOffset is zero, there is no scalable offset.
2886 struct AddrMode {
2887 GlobalValue *BaseGV = nullptr;
2888 int64_t BaseOffs = 0;
2889 bool HasBaseReg = false;
2890 int64_t Scale = 0;
2891 int64_t ScalableOffset = 0;
2892 AddrMode() = default;
2893 };
2894
2895 /// Return true if the addressing mode represented by AM is legal for this
2896 /// target, for a load/store of the specified type.
2897 ///
2898 /// The type may be VoidTy, in which case only return true if the addressing
2899 /// mode is legal for a load/store of any legal type. TODO: Handle
2900 /// pre/postinc as well.
2901 ///
2902 /// If the address space cannot be determined, it will be -1.
2903 ///
2904 /// TODO: Remove default argument
2905 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2906 Type *Ty, unsigned AddrSpace,
2907 Instruction *I = nullptr) const;
2908
2909 /// Returns true if the targets addressing mode can target thread local
2910 /// storage (TLS).
2911 virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
2912 return false;
2913 }
2914
2915 /// Return the prefered common base offset.
2916 virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
2917 int64_t MaxOffset) const {
2918 return 0;
2919 }
2920
2921 /// Return true if the specified immediate is legal icmp immediate, that is
2922 /// the target has icmp instructions which can compare a register against the
2923 /// immediate without having to materialize the immediate into a register.
2924 virtual bool isLegalICmpImmediate(int64_t) const {
2925 return true;
2926 }
2927
2928 /// Return true if the specified immediate is legal add immediate, that is the
2929 /// target has add instructions which can add a register with the immediate
2930 /// without having to materialize the immediate into a register.
2931 virtual bool isLegalAddImmediate(int64_t) const {
2932 return true;
2933 }
2934
2935 /// Return true if adding the specified scalable immediate is legal, that is
2936 /// the target has add instructions which can add a register with the
2937 /// immediate (multiplied by vscale) without having to materialize the
2938 /// immediate into a register.
2939 virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
2940
2941 /// Return true if the specified immediate is legal for the value input of a
2942 /// store instruction.
2943 virtual bool isLegalStoreImmediate(int64_t Value) const {
2944 // Default implementation assumes that at least 0 works since it is likely
2945 // that a zero register exists or a zero immediate is allowed.
2946 return Value == 0;
2947 }
2948
2949 /// Given a shuffle vector SVI representing a vector splat, return a new
2950 /// scalar type of size equal to SVI's scalar type if the new type is more
2951 /// profitable. Returns nullptr otherwise. For example under MVE float splats
2952 /// are converted to integer to prevent the need to move from SPR to GPR
2953 /// registers.
2954 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const {
2955 return nullptr;
2956 }
2957
2958 /// Given a set in interconnected phis of type 'From' that are loaded/stored
2959 /// or bitcast to type 'To', return true if the set should be converted to
2960 /// 'To'.
2961 virtual bool shouldConvertPhiType(Type *From, Type *To) const {
2962 return (From->isIntegerTy() || From->isFloatingPointTy()) &&
2963 (To->isIntegerTy() || To->isFloatingPointTy());
2964 }
2965
2966 /// Returns true if the opcode is a commutative binary operation.
2967 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2968 // FIXME: This should get its info from the td file.
2969 switch (Opcode) {
2970 case ISD::ADD:
2971 case ISD::SMIN:
2972 case ISD::SMAX:
2973 case ISD::UMIN:
2974 case ISD::UMAX:
2975 case ISD::MUL:
2976 case ISD::MULHU:
2977 case ISD::MULHS:
2978 case ISD::SMUL_LOHI:
2979 case ISD::UMUL_LOHI:
2980 case ISD::FADD:
2981 case ISD::FMUL:
2982 case ISD::AND:
2983 case ISD::OR:
2984 case ISD::XOR:
2985 case ISD::SADDO:
2986 case ISD::UADDO:
2987 case ISD::ADDC:
2988 case ISD::ADDE:
2989 case ISD::SADDSAT:
2990 case ISD::UADDSAT:
2991 case ISD::FMINNUM:
2992 case ISD::FMAXNUM:
2993 case ISD::FMINNUM_IEEE:
2994 case ISD::FMAXNUM_IEEE:
2995 case ISD::FMINIMUM:
2996 case ISD::FMAXIMUM:
2997 case ISD::FMINIMUMNUM:
2998 case ISD::FMAXIMUMNUM:
2999 case ISD::AVGFLOORS:
3000 case ISD::AVGFLOORU:
3001 case ISD::AVGCEILS:
3002 case ISD::AVGCEILU:
3003 case ISD::ABDS:
3004 case ISD::ABDU:
3005 return true;
3006 default: return false;
3007 }
3008 }
3009
3010 /// Return true if the node is a math/logic binary operator.
3011 virtual bool isBinOp(unsigned Opcode) const {
3012 // A commutative binop must be a binop.
3013 if (isCommutativeBinOp(Opcode))
3014 return true;
3015 // These are non-commutative binops.
3016 switch (Opcode) {
3017 case ISD::SUB:
3018 case ISD::SHL:
3019 case ISD::SRL:
3020 case ISD::SRA:
3021 case ISD::ROTL:
3022 case ISD::ROTR:
3023 case ISD::SDIV:
3024 case ISD::UDIV:
3025 case ISD::SREM:
3026 case ISD::UREM:
3027 case ISD::SSUBSAT:
3028 case ISD::USUBSAT:
3029 case ISD::FSUB:
3030 case ISD::FDIV:
3031 case ISD::FREM:
3032 return true;
3033 default:
3034 return false;
3035 }
3036 }
3037
3038 /// Return true if it's free to truncate a value of type FromTy to type
3039 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
3040 /// by referencing its sub-register AX.
3041 /// Targets must return false when FromTy <= ToTy.
3042 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
3043 return false;
3044 }
3045
3046 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
3047 /// whether a call is in tail position. Typically this means that both results
3048 /// would be assigned to the same register or stack slot, but it could mean
3049 /// the target performs adequate checks of its own before proceeding with the
3050 /// tail call. Targets must return false when FromTy <= ToTy.
3051 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
3052 return false;
3053 }
3054
3055 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
3056 virtual bool isTruncateFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const {
3057 return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, Ctx),
3058 ToVT: getApproximateEVTForLLT(Ty: ToTy, Ctx));
3059 }
3060
3061 /// Return true if truncating the specific node Val to type VT2 is free.
3062 virtual bool isTruncateFree(SDValue Val, EVT VT2) const {
3063 // Fallback to type matching.
3064 return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2);
3065 }
3066
3067 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
3068
3069 /// Return true if the extension represented by \p I is free.
3070 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
3071 /// this method can use the context provided by \p I to decide
3072 /// whether or not \p I is free.
3073 /// This method extends the behavior of the is[Z|FP]ExtFree family.
3074 /// In other words, if is[Z|FP]Free returns true, then this method
3075 /// returns true as well. The converse is not true.
3076 /// The target can perform the adequate checks by overriding isExtFreeImpl.
3077 /// \pre \p I must be a sign, zero, or fp extension.
3078 bool isExtFree(const Instruction *I) const {
3079 switch (I->getOpcode()) {
3080 case Instruction::FPExt:
3081 if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()),
3082 SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType())))
3083 return true;
3084 break;
3085 case Instruction::ZExt:
3086 if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType()))
3087 return true;
3088 break;
3089 case Instruction::SExt:
3090 break;
3091 default:
3092 llvm_unreachable("Instruction is not an extension");
3093 }
3094 return isExtFreeImpl(I);
3095 }
3096
3097 /// Return true if \p Load and \p Ext can form an ExtLoad.
3098 /// For example, in AArch64
3099 /// %L = load i8, i8* %ptr
3100 /// %E = zext i8 %L to i32
3101 /// can be lowered into one load instruction
3102 /// ldrb w0, [x0]
3103 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
3104 const DataLayout &DL) const {
3105 EVT VT = getValueType(DL, Ty: Ext->getType());
3106 EVT LoadVT = getValueType(DL, Ty: Load->getType());
3107
3108 // If the load has other users and the truncate is not free, the ext
3109 // probably isn't free.
3110 if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) &&
3111 !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType()))
3112 return false;
3113
3114 // Check whether the target supports casts folded into loads.
3115 unsigned LType;
3116 if (isa<ZExtInst>(Val: Ext))
3117 LType = ISD::ZEXTLOAD;
3118 else {
3119 assert(isa<SExtInst>(Ext) && "Unexpected ext type!");
3120 LType = ISD::SEXTLOAD;
3121 }
3122
3123 return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT);
3124 }
3125
3126 /// Return true if any actual instruction that defines a value of type FromTy
3127 /// implicitly zero-extends the value to ToTy in the result register.
3128 ///
3129 /// The function should return true when it is likely that the truncate can
3130 /// be freely folded with an instruction defining a value of FromTy. If
3131 /// the defining instruction is unknown (because you're looking at a
3132 /// function argument, PHI, etc.) then the target may require an
3133 /// explicit truncate, which is not necessarily free, but this function
3134 /// does not deal with those cases.
3135 /// Targets must return false when FromTy >= ToTy.
3136 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
3137 return false;
3138 }
3139
3140 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
3141 virtual bool isZExtFree(LLT FromTy, LLT ToTy, LLVMContext &Ctx) const {
3142 return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, Ctx),
3143 ToTy: getApproximateEVTForLLT(Ty: ToTy, Ctx));
3144 }
3145
3146 /// Return true if zero-extending the specific node Val to type VT2 is free
3147 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
3148 /// because it's folded such as X86 zero-extending loads).
3149 virtual bool isZExtFree(SDValue Val, EVT VT2) const {
3150 return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2);
3151 }
3152
3153 /// Return true if sign-extension from FromTy to ToTy is cheaper than
3154 /// zero-extension.
3155 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
3156 return false;
3157 }
3158
3159 /// Return true if this constant should be sign extended when promoting to
3160 /// a larger type.
3161 virtual bool signExtendConstant(const ConstantInt *C) const { return false; }
3162
3163 /// Try to optimize extending or truncating conversion instructions (like
3164 /// zext, trunc, fptoui, uitofp) for the target.
3165 virtual bool
3166 optimizeExtendOrTruncateConversion(Instruction *I, Loop *L,
3167 const TargetTransformInfo &TTI) const {
3168 return false;
3169 }
3170
3171 /// Return true if the target supplies and combines to a paired load
3172 /// two loaded values of type LoadedType next to each other in memory.
3173 /// RequiredAlignment gives the minimal alignment constraints that must be met
3174 /// to be able to select this paired load.
3175 ///
3176 /// This information is *not* used to generate actual paired loads, but it is
3177 /// used to generate a sequence of loads that is easier to combine into a
3178 /// paired load.
3179 /// For instance, something like this:
3180 /// a = load i64* addr
3181 /// b = trunc i64 a to i32
3182 /// c = lshr i64 a, 32
3183 /// d = trunc i64 c to i32
3184 /// will be optimized into:
3185 /// b = load i32* addr1
3186 /// d = load i32* addr2
3187 /// Where addr1 = addr2 +/- sizeof(i32).
3188 ///
3189 /// In other words, unless the target performs a post-isel load combining,
3190 /// this information should not be provided because it will generate more
3191 /// loads.
3192 virtual bool hasPairedLoad(EVT /*LoadedType*/,
3193 Align & /*RequiredAlignment*/) const {
3194 return false;
3195 }
3196
3197 /// Return true if the target has a vector blend instruction.
3198 virtual bool hasVectorBlend() const { return false; }
3199
3200 /// Get the maximum supported factor for interleaved memory accesses.
3201 /// Default to be the minimum interleave factor: 2.
3202 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
3203
3204 /// Lower an interleaved load to target specific intrinsics. Return
3205 /// true on success.
3206 ///
3207 /// \p LI is the vector load instruction.
3208 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
3209 /// \p Indices is the corresponding indices for each shufflevector.
3210 /// \p Factor is the interleave factor.
3211 virtual bool lowerInterleavedLoad(LoadInst *LI,
3212 ArrayRef<ShuffleVectorInst *> Shuffles,
3213 ArrayRef<unsigned> Indices,
3214 unsigned Factor) const {
3215 return false;
3216 }
3217
3218 /// Lower an interleaved store to target specific intrinsics. Return
3219 /// true on success.
3220 ///
3221 /// \p SI is the vector store instruction.
3222 /// \p SVI is the shufflevector to RE-interleave the stored vector.
3223 /// \p Factor is the interleave factor.
3224 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
3225 unsigned Factor) const {
3226 return false;
3227 }
3228
3229 /// Lower an interleaved load to target specific intrinsics. Return
3230 /// true on success.
3231 ///
3232 /// \p Load is a vp.load instruction.
3233 /// \p Mask is a mask value
3234 /// \p DeinterleaveRes is a list of deinterleaved results.
3235 virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
3236 ArrayRef<Value *> DeinterleaveRes) const {
3237 return false;
3238 }
3239
3240 /// Lower an interleaved store to target specific intrinsics. Return
3241 /// true on success.
3242 ///
3243 /// \p Store is the vp.store instruction.
3244 /// \p Mask is a mask value
3245 /// \p InterleaveOps is a list of values being interleaved.
3246 virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
3247 ArrayRef<Value *> InterleaveOps) const {
3248 return false;
3249 }
3250
3251 /// Lower a deinterleave intrinsic to a target specific load intrinsic.
3252 /// Return true on success. Currently only supports
3253 /// llvm.vector.deinterleave{2,3,5,7}
3254 ///
3255 /// \p LI is the accompanying load instruction.
3256 /// \p DeinterleaveValues contains the deinterleaved values.
3257 virtual bool
3258 lowerDeinterleaveIntrinsicToLoad(LoadInst *LI,
3259 ArrayRef<Value *> DeinterleaveValues) const {
3260 return false;
3261 }
3262
3263 /// Lower an interleave intrinsic to a target specific store intrinsic.
3264 /// Return true on success. Currently only supports
3265 /// llvm.vector.interleave{2,3,5,7}
3266 ///
3267 /// \p SI is the accompanying store instruction
3268 /// \p InterleaveValues contains the interleaved values.
3269 virtual bool
3270 lowerInterleaveIntrinsicToStore(StoreInst *SI,
3271 ArrayRef<Value *> InterleaveValues) const {
3272 return false;
3273 }
3274
3275 /// Return true if an fpext operation is free (for instance, because
3276 /// single-precision floating-point numbers are implicitly extended to
3277 /// double-precision).
3278 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
3279 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&
3280 "invalid fpext types");
3281 return false;
3282 }
3283
3284 /// Return true if an fpext operation input to an \p Opcode operation is free
3285 /// (for instance, because half-precision floating-point numbers are
3286 /// implicitly extended to float-precision) for an FMA instruction.
3287 virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
3288 LLT DestTy, LLT SrcTy) const {
3289 return false;
3290 }
3291
3292 /// Return true if an fpext operation input to an \p Opcode operation is free
3293 /// (for instance, because half-precision floating-point numbers are
3294 /// implicitly extended to float-precision) for an FMA instruction.
3295 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
3296 EVT DestVT, EVT SrcVT) const {
3297 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
3298 "invalid fpext types");
3299 return isFPExtFree(DestVT, SrcVT);
3300 }
3301
3302 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
3303 /// extend node) is profitable.
3304 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
3305
3306 /// Return true if an fneg operation is free to the point where it is never
3307 /// worthwhile to replace it with a bitwise operation.
3308 virtual bool isFNegFree(EVT VT) const {
3309 assert(VT.isFloatingPoint());
3310 return false;
3311 }
3312
3313 /// Return true if an fabs operation is free to the point where it is never
3314 /// worthwhile to replace it with a bitwise operation.
3315 virtual bool isFAbsFree(EVT VT) const {
3316 assert(VT.isFloatingPoint());
3317 return false;
3318 }
3319
3320 /// Return true if an FMA operation is faster than a pair of fmul and fadd
3321 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
3322 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
3323 ///
3324 /// NOTE: This may be called before legalization on types for which FMAs are
3325 /// not legal, but should return true if those types will eventually legalize
3326 /// to types that support FMAs. After legalization, it will only be called on
3327 /// types that support FMAs (via Legal or Custom actions)
3328 ///
3329 /// Targets that care about soft float support should return false when soft
3330 /// float code is being generated (i.e. use-soft-float).
3331 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3332 EVT) const {
3333 return false;
3334 }
3335
3336 /// Return true if an FMA operation is faster than a pair of fmul and fadd
3337 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
3338 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
3339 ///
3340 /// NOTE: This may be called before legalization on types for which FMAs are
3341 /// not legal, but should return true if those types will eventually legalize
3342 /// to types that support FMAs. After legalization, it will only be called on
3343 /// types that support FMAs (via Legal or Custom actions)
3344 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3345 LLT) const {
3346 return false;
3347 }
3348
3349 /// IR version
3350 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
3351 return false;
3352 }
3353
3354 /// Returns true if \p MI can be combined with another instruction to
3355 /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD,
3356 /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be
3357 /// distributed into an fadd/fsub.
3358 virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const {
3359 assert((MI.getOpcode() == TargetOpcode::G_FADD ||
3360 MI.getOpcode() == TargetOpcode::G_FSUB ||
3361 MI.getOpcode() == TargetOpcode::G_FMUL) &&
3362 "unexpected node in FMAD forming combine");
3363 switch (Ty.getScalarSizeInBits()) {
3364 case 16:
3365 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16);
3366 case 32:
3367 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32);
3368 case 64:
3369 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64);
3370 default:
3371 break;
3372 }
3373
3374 return false;
3375 }
3376
3377 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an
3378 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
3379 /// fadd/fsub.
3380 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const {
3381 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||
3382 N->getOpcode() == ISD::FMUL) &&
3383 "unexpected node in FMAD forming combine");
3384 return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0));
3385 }
3386
3387 // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
3388 // than FMUL and ADD is delegated to the machine combiner.
3389 virtual bool generateFMAsInMachineCombiner(EVT VT,
3390 CodeGenOptLevel OptLevel) const {
3391 return false;
3392 }
3393
3394 /// Return true if it's profitable to narrow operations of type SrcVT to
3395 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
3396 /// i32 to i16.
3397 virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const {
3398 return false;
3399 }
3400
3401 /// Return true if pulling a binary operation into a select with an identity
3402 /// constant is profitable. This is the inverse of an IR transform.
3403 /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X
3404 virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
3405 unsigned SelectOpcode,
3406 SDValue X,
3407 SDValue Y) const {
3408 return false;
3409 }
3410
3411 /// Return true if it is beneficial to convert a load of a constant to
3412 /// just the constant itself.
3413 /// On some targets it might be more efficient to use a combination of
3414 /// arithmetic instructions to materialize the constant instead of loading it
3415 /// from a constant pool.
3416 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
3417 Type *Ty) const {
3418 return false;
3419 }
3420
3421 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
3422 /// from this source type with this index. This is needed because
3423 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
3424 /// the first element, and only the target knows which lowering is cheap.
3425 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
3426 unsigned Index) const {
3427 return false;
3428 }
3429
3430 /// Try to convert an extract element of a vector binary operation into an
3431 /// extract element followed by a scalar operation.
3432 virtual bool shouldScalarizeBinop(SDValue VecOp) const {
3433 return false;
3434 }
3435
3436 /// Return true if extraction of a scalar element from the given vector type
3437 /// at the given index is cheap. For example, if scalar operations occur on
3438 /// the same register file as vector operations, then an extract element may
3439 /// be a sub-register rename rather than an actual instruction.
3440 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
3441 return false;
3442 }
3443
3444 /// Try to convert math with an overflow comparison into the corresponding DAG
3445 /// node operation. Targets may want to override this independently of whether
3446 /// the operation is legal/custom for the given type because it may obscure
3447 /// matching of other patterns.
3448 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
3449 bool MathUsed) const {
3450 // TODO: The default logic is inherited from code in CodeGenPrepare.
3451 // The opcode should not make a difference by default?
3452 if (Opcode != ISD::UADDO)
3453 return false;
3454
3455 // Allow the transform as long as we have an integer type that is not
3456 // obviously illegal and unsupported and if the math result is used
3457 // besides the overflow check. On some targets (e.g. SPARC), it is
3458 // not profitable to form on overflow op if the math result has no
3459 // concrete users.
3460 if (VT.isVector())
3461 return false;
3462 return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT));
3463 }
3464
3465 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
3466 // even if the vector itself has multiple uses.
3467 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
3468 return false;
3469 }
3470
3471 // Return true if CodeGenPrepare should consider splitting large offset of a
3472 // GEP to make the GEP fit into the addressing mode and can be sunk into the
3473 // same blocks of its users.
3474 virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
3475
3476 /// Return true if creating a shift of the type by the given
3477 /// amount is not profitable.
3478 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const {
3479 return false;
3480 }
3481
3482 // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
3483 // A) where y has a single bit set?
3484 virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
3485 const APInt &AndMask) const {
3486 unsigned ShCt = AndMask.getBitWidth() - 1;
3487 return !shouldAvoidTransformToShift(VT, Amount: ShCt);
3488 }
3489
3490 /// Does this target require the clearing of high-order bits in a register
3491 /// passed to the fp16 to fp conversion library function.
3492 virtual bool shouldKeepZExtForFP16Conv() const { return false; }
3493
3494 /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT
3495 /// from min(max(fptoi)) saturation patterns.
3496 virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const {
3497 return isOperationLegalOrCustom(Op, VT);
3498 }
3499
3500 /// Should we expand [US]CMP nodes using two selects and two compares, or by
3501 /// doing arithmetic on boolean types
3502 virtual bool shouldExpandCmpUsingSelects(EVT VT) const { return false; }
3503
3504 /// True if target has some particular form of dealing with pointer arithmetic
3505 /// semantics for pointers with the given value type. False if pointer
3506 /// arithmetic should not be preserved for passes such as instruction
3507 /// selection, and can fallback to regular arithmetic.
3508 /// This should be removed when PTRADD nodes are widely supported by backends.
3509 virtual bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const {
3510 return false;
3511 }
3512
3513 /// Does this target support complex deinterleaving
3514 virtual bool isComplexDeinterleavingSupported() const { return false; }
3515
3516 /// Does this target support complex deinterleaving with the given operation
3517 /// and type
3518 virtual bool isComplexDeinterleavingOperationSupported(
3519 ComplexDeinterleavingOperation Operation, Type *Ty) const {
3520 return false;
3521 }
3522
3523 // Get the preferred opcode for FP_TO_XINT nodes.
3524 // By default, this checks if the provded operation is an illegal FP_TO_UINT
3525 // and if so, checks if FP_TO_SINT is legal or custom for use as a
3526 // replacement. If both UINT and SINT conversions are Custom, we choose SINT
3527 // by default because that's the right thing on PPC.
3528 virtual unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
3529 EVT ToVT) const {
3530 if (isOperationLegal(Op, VT: ToVT))
3531 return Op;
3532 switch (Op) {
3533 case ISD::FP_TO_UINT:
3534 if (isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: ToVT))
3535 return ISD::FP_TO_SINT;
3536 break;
3537 case ISD::STRICT_FP_TO_UINT:
3538 if (isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: ToVT))
3539 return ISD::STRICT_FP_TO_SINT;
3540 break;
3541 case ISD::VP_FP_TO_UINT:
3542 if (isOperationLegalOrCustom(Op: ISD::VP_FP_TO_SINT, VT: ToVT))
3543 return ISD::VP_FP_TO_SINT;
3544 break;
3545 default:
3546 break;
3547 }
3548 return Op;
3549 }
3550
3551 /// Create the IR node for the given complex deinterleaving operation.
3552 /// If one cannot be created using all the given inputs, nullptr should be
3553 /// returned.
3554 virtual Value *createComplexDeinterleavingIR(
3555 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
3556 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
3557 Value *Accumulator = nullptr) const {
3558 return nullptr;
3559 }
3560
3561 /// Rename the default libcall routine name for the specified libcall.
3562 void setLibcallName(RTLIB::Libcall Call, const char *Name) {
3563 Libcalls.setLibcallName(Call, Name);
3564 }
3565
3566 void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) {
3567 Libcalls.setLibcallName(Calls, Name);
3568 }
3569
3570 /// Get the libcall routine name for the specified libcall.
3571 const char *getLibcallName(RTLIB::Libcall Call) const {
3572 return Libcalls.getLibcallName(Call);
3573 }
3574
3575 /// Override the default CondCode to be used to test the result of the
3576 /// comparison libcall against zero.
3577 /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD.
3578 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
3579 CmpLibcallCCs[Call] = CC;
3580 }
3581
3582
3583 /// Get the CondCode that's to be used to test the result of the comparison
3584 /// libcall against zero.
3585 /// FIXME: This can't be merged with 'RuntimeLibcallsInfo' because of the ISD.
3586 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
3587 return CmpLibcallCCs[Call];
3588 }
3589
3590
3591 /// Set the CallingConv that should be used for the specified libcall.
3592 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
3593 Libcalls.setLibcallCallingConv(Call, CC);
3594 }
3595
3596 /// Get the CallingConv that should be used for the specified libcall.
3597 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
3598 return Libcalls.getLibcallCallingConv(Call);
3599 }
3600
3601 /// Execute target specific actions to finalize target lowering.
3602 /// This is used to set extra flags in MachineFrameInformation and freezing
3603 /// the set of reserved registers.
3604 /// The default implementation just freezes the set of reserved registers.
3605 virtual void finalizeLowering(MachineFunction &MF) const;
3606
3607 /// Returns true if it's profitable to allow merging store of loads when there
3608 /// are functions calls between the load and the store.
3609 virtual bool shouldMergeStoreOfLoadsOverCall(EVT, EVT) const { return true; }
3610
3611 //===----------------------------------------------------------------------===//
3612 // GlobalISel Hooks
3613 //===----------------------------------------------------------------------===//
3614 /// Check whether or not \p MI needs to be moved close to its uses.
3615 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const;
3616
3617
3618private:
3619 const TargetMachine &TM;
3620
3621 /// Tells the code generator that the target has multiple (allocatable)
3622 /// condition registers that can be used to store the results of comparisons
3623 /// for use by selects and conditional branches. With multiple condition
3624 /// registers, the code generator will not aggressively sink comparisons into
3625 /// the blocks of their users.
3626 bool HasMultipleConditionRegisters;
3627
3628 /// Tells the code generator that the target has BitExtract instructions.
3629 /// The code generator will aggressively sink "shift"s into the blocks of
3630 /// their users if the users will generate "and" instructions which can be
3631 /// combined with "shift" to BitExtract instructions.
3632 bool HasExtractBitsInsn;
3633
3634 /// Tells the code generator to bypass slow divide or remainder
3635 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
3636 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
3637 /// div/rem when the operands are positive and less than 256.
3638 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
3639
3640 /// Tells the code generator that it shouldn't generate extra flow control
3641 /// instructions and should attempt to combine flow control instructions via
3642 /// predication.
3643 bool JumpIsExpensive;
3644
3645 /// Information about the contents of the high-bits in boolean values held in
3646 /// a type wider than i1. See getBooleanContents.
3647 BooleanContent BooleanContents;
3648
3649 /// Information about the contents of the high-bits in boolean values held in
3650 /// a type wider than i1. See getBooleanContents.
3651 BooleanContent BooleanFloatContents;
3652
3653 /// Information about the contents of the high-bits in boolean vector values
3654 /// when the element type is wider than i1. See getBooleanContents.
3655 BooleanContent BooleanVectorContents;
3656
3657 /// The target scheduling preference: shortest possible total cycles or lowest
3658 /// register usage.
3659 Sched::Preference SchedPreferenceInfo;
3660
3661 /// The minimum alignment that any argument on the stack needs to have.
3662 Align MinStackArgumentAlignment;
3663
3664 /// The minimum function alignment (used when optimizing for size, and to
3665 /// prevent explicitly provided alignment from leading to incorrect code).
3666 Align MinFunctionAlignment;
3667
3668 /// The preferred function alignment (used when alignment unspecified and
3669 /// optimizing for speed).
3670 Align PrefFunctionAlignment;
3671
3672 /// The preferred loop alignment (in log2 bot in bytes).
3673 Align PrefLoopAlignment;
3674 /// The maximum amount of bytes permitted to be emitted for alignment.
3675 unsigned MaxBytesForAlignment;
3676
3677 /// Size in bits of the maximum atomics size the backend supports.
3678 /// Accesses larger than this will be expanded by AtomicExpandPass.
3679 unsigned MaxAtomicSizeInBitsSupported;
3680
3681 /// Size in bits of the maximum div/rem size the backend supports.
3682 /// Larger operations will be expanded by ExpandLargeDivRem.
3683 unsigned MaxDivRemBitWidthSupported;
3684
3685 /// Size in bits of the maximum fp to/from int conversion size the
3686 /// backend supports. Larger operations will be expanded by
3687 /// ExpandFp.
3688 unsigned MaxLargeFPConvertBitWidthSupported;
3689
3690 /// Size in bits of the minimum cmpxchg or ll/sc operation the
3691 /// backend supports.
3692 unsigned MinCmpXchgSizeInBits;
3693
3694 /// This indicates if the target supports unaligned atomic operations.
3695 bool SupportsUnalignedAtomics;
3696
3697 /// If set to a physical register, this specifies the register that
3698 /// llvm.savestack/llvm.restorestack should save and restore.
3699 Register StackPointerRegisterToSaveRestore;
3700
3701 /// This indicates the default register class to use for each ValueType the
3702 /// target supports natively.
3703 const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE];
3704 uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE];
3705 MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE];
3706
3707 /// This indicates the "representative" register class to use for each
3708 /// ValueType the target supports natively. This information is used by the
3709 /// scheduler to track register pressure. By default, the representative
3710 /// register class is the largest legal super-reg register class of the
3711 /// register class of the specified type. e.g. On x86, i8, i16, and i32's
3712 /// representative class would be GR32.
3713 const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0};
3714
3715 /// This indicates the "cost" of the "representative" register class for each
3716 /// ValueType. The cost is used by the scheduler to approximate register
3717 /// pressure.
3718 uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE];
3719
3720 /// For any value types we are promoting or expanding, this contains the value
3721 /// type that we are changing to. For Expanded types, this contains one step
3722 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
3723 /// (e.g. i64 -> i16). For types natively supported by the system, this holds
3724 /// the same type (e.g. i32 -> i32).
3725 MVT TransformToType[MVT::VALUETYPE_SIZE];
3726
3727 /// For each operation and each value type, keep a LegalizeAction that
3728 /// indicates how instruction selection should deal with the operation. Most
3729 /// operations are Legal (aka, supported natively by the target), but
3730 /// operations that are not should be described. Note that operations on
3731 /// non-legal value types are not described here.
3732 LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END];
3733
3734 /// For each load extension type and each value type, keep a LegalizeAction
3735 /// that indicates how instruction selection should deal with a load of a
3736 /// specific value type and extension type. Uses 4-bits to store the action
3737 /// for each of the 4 load ext types.
3738 uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3739
3740 /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand
3741 /// (default) values are supported.
3742 uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3743
3744 /// For each value type pair keep a LegalizeAction that indicates whether a
3745 /// truncating store of a specific value type and truncating type is legal.
3746 LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3747
3748 /// For each indexed mode and each value type, keep a quad of LegalizeAction
3749 /// that indicates how instruction selection should deal with the load /
3750 /// store / maskedload / maskedstore.
3751 ///
3752 /// The first dimension is the value_type for the reference. The second
3753 /// dimension represents the various modes for load store.
3754 uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE];
3755
3756 /// For each condition code (ISD::CondCode) keep a LegalizeAction that
3757 /// indicates how instruction selection should deal with the condition code.
3758 ///
3759 /// Because each CC action takes up 4 bits, we need to have the array size be
3760 /// large enough to fit all of the value types. This can be done by rounding
3761 /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8.
3762 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8];
3763
3764 using PartialReduceActionTypes =
3765 std::tuple<unsigned, MVT::SimpleValueType, MVT::SimpleValueType>;
3766 /// For each partial reduce opcode, result type and input type combination,
3767 /// keep a LegalizeAction which indicates how instruction selection should
3768 /// deal with this operation.
3769 DenseMap<PartialReduceActionTypes, LegalizeAction> PartialReduceMLAActions;
3770
3771 ValueTypeActionImpl ValueTypeActions;
3772
3773private:
3774 /// Targets can specify ISD nodes that they would like PerformDAGCombine
3775 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
3776 /// array.
3777 unsigned char
3778 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT];
3779
3780 /// For operations that must be promoted to a specific type, this holds the
3781 /// destination type. This map should be sparse, so don't hold it as an
3782 /// array.
3783 ///
3784 /// Targets add entries to this map with AddPromotedToType(..), clients access
3785 /// this with getTypeToPromoteTo(..).
3786 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
3787 PromoteToType;
3788
3789 /// The list of libcalls that the target will use.
3790 RTLIB::RuntimeLibcallsInfo Libcalls;
3791
3792 /// The ISD::CondCode that should be used to test the result of each of the
3793 /// comparison libcall against zero.
3794 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
3795
3796 /// The bits of IndexedModeActions used to store the legalisation actions
3797 /// We store the data as | ML | MS | L | S | each taking 4 bits.
3798 enum IndexedModeActionsBits {
3799 IMAB_Store = 0,
3800 IMAB_Load = 4,
3801 IMAB_MaskedStore = 8,
3802 IMAB_MaskedLoad = 12
3803 };
3804
3805 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift,
3806 LegalizeAction Action) {
3807 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
3808 (unsigned)Action < 0xf && "Table isn't big enough!");
3809 unsigned Ty = (unsigned)VT.SimpleTy;
3810 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift);
3811 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift;
3812 }
3813
3814 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT,
3815 unsigned Shift) const {
3816 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
3817 "Table isn't big enough!");
3818 unsigned Ty = (unsigned)VT.SimpleTy;
3819 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf);
3820 }
3821
3822protected:
3823 /// Return true if the extension represented by \p I is free.
3824 /// \pre \p I is a sign, zero, or fp extension and
3825 /// is[Z|FP]ExtFree of the related types is not true.
3826 virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
3827
3828 /// Depth that GatherAllAliases should continue looking for chain
3829 /// dependencies when trying to find a more preferable chain. As an
3830 /// approximation, this should be more than the number of consecutive stores
3831 /// expected to be merged.
3832 unsigned GatherAllAliasesMaxDepth;
3833
3834 /// \brief Specify maximum number of store instructions per memset call.
3835 ///
3836 /// When lowering \@llvm.memset this field specifies the maximum number of
3837 /// store operations that may be substituted for the call to memset. Targets
3838 /// must set this value based on the cost threshold for that target. Targets
3839 /// should assume that the memset will be done using as many of the largest
3840 /// store operations first, followed by smaller ones, if necessary, per
3841 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
3842 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
3843 /// store. This only applies to setting a constant array of a constant size.
3844 unsigned MaxStoresPerMemset;
3845 /// Likewise for functions with the OptSize attribute.
3846 unsigned MaxStoresPerMemsetOptSize;
3847
3848 /// \brief Specify maximum number of store instructions per memcpy call.
3849 ///
3850 /// When lowering \@llvm.memcpy this field specifies the maximum number of
3851 /// store operations that may be substituted for a call to memcpy. Targets
3852 /// must set this value based on the cost threshold for that target. Targets
3853 /// should assume that the memcpy will be done using as many of the largest
3854 /// store operations first, followed by smaller ones, if necessary, per
3855 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
3856 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
3857 /// and one 1-byte store. This only applies to copying a constant array of
3858 /// constant size.
3859 unsigned MaxStoresPerMemcpy;
3860 /// Likewise for functions with the OptSize attribute.
3861 unsigned MaxStoresPerMemcpyOptSize;
3862 /// \brief Specify max number of store instructions to glue in inlined memcpy.
3863 ///
3864 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
3865 /// of store instructions to keep together. This helps in pairing and
3866 // vectorization later on.
3867 unsigned MaxGluedStoresPerMemcpy = 0;
3868
3869 /// \brief Specify maximum number of load instructions per memcmp call.
3870 ///
3871 /// When lowering \@llvm.memcmp this field specifies the maximum number of
3872 /// pairs of load operations that may be substituted for a call to memcmp.
3873 /// Targets must set this value based on the cost threshold for that target.
3874 /// Targets should assume that the memcmp will be done using as many of the
3875 /// largest load operations first, followed by smaller ones, if necessary, per
3876 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
3877 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
3878 /// and one 1-byte load. This only applies to copying a constant array of
3879 /// constant size.
3880 unsigned MaxLoadsPerMemcmp;
3881 /// Likewise for functions with the OptSize attribute.
3882 unsigned MaxLoadsPerMemcmpOptSize;
3883
3884 /// \brief Specify maximum number of store instructions per memmove call.
3885 ///
3886 /// When lowering \@llvm.memmove this field specifies the maximum number of
3887 /// store instructions that may be substituted for a call to memmove. Targets
3888 /// must set this value based on the cost threshold for that target. Targets
3889 /// should assume that the memmove will be done using as many of the largest
3890 /// store operations first, followed by smaller ones, if necessary, per
3891 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
3892 /// with 8-bit alignment would result in nine 1-byte stores. This only
3893 /// applies to copying a constant array of constant size.
3894 unsigned MaxStoresPerMemmove;
3895 /// Likewise for functions with the OptSize attribute.
3896 unsigned MaxStoresPerMemmoveOptSize;
3897
3898 /// Tells the code generator that select is more expensive than a branch if
3899 /// the branch is usually predicted right.
3900 bool PredictableSelectIsExpensive;
3901
3902 /// \see enableExtLdPromotion.
3903 bool EnableExtLdPromotion;
3904
3905 /// Return true if the value types that can be represented by the specified
3906 /// register class are all legal.
3907 bool isLegalRC(const TargetRegisterInfo &TRI,
3908 const TargetRegisterClass &RC) const;
3909
3910 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
3911 /// sequence of memory operands that is recognized by PrologEpilogInserter.
3912 MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
3913 MachineBasicBlock *MBB) const;
3914
3915 bool IsStrictFPEnabled;
3916};
3917
3918/// This class defines information used to lower LLVM code to legal SelectionDAG
3919/// operators that the target instruction selector can accept natively.
3920///
3921/// This class also defines callbacks that targets must implement to lower
3922/// target-specific constructs to SelectionDAG operators.
3923class LLVM_ABI TargetLowering : public TargetLoweringBase {
3924public:
3925 struct DAGCombinerInfo;
3926 struct MakeLibCallOptions;
3927
3928 TargetLowering(const TargetLowering &) = delete;
3929 TargetLowering &operator=(const TargetLowering &) = delete;
3930
3931 explicit TargetLowering(const TargetMachine &TM);
3932 ~TargetLowering() override;
3933
3934 bool isPositionIndependent() const;
3935
3936 virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
3937 FunctionLoweringInfo *FLI,
3938 UniformityInfo *UA) const {
3939 return false;
3940 }
3941
3942 // Lets target to control the following reassociation of operands: (op (op x,
3943 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
3944 // default consider profitable any case where N0 has single use. This
3945 // behavior reflects the condition replaced by this target hook call in the
3946 // DAGCombiner. Any particular target can implement its own heuristic to
3947 // restrict common combiner.
3948 virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
3949 SDValue N1) const {
3950 return N0.hasOneUse();
3951 }
3952
3953 // Lets target to control the following reassociation of operands: (op (op x,
3954 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
3955 // default consider profitable any case where N0 has single use. This
3956 // behavior reflects the condition replaced by this target hook call in the
3957 // combiner. Any particular target can implement its own heuristic to
3958 // restrict common combiner.
3959 virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
3960 Register N1) const {
3961 return MRI.hasOneNonDBGUse(RegNo: N0);
3962 }
3963
3964 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
3965 return false;
3966 }
3967
3968 /// Returns true by value, base pointer and offset pointer and addressing mode
3969 /// by reference if the node's address can be legally represented as
3970 /// pre-indexed load / store address.
3971 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
3972 SDValue &/*Offset*/,
3973 ISD::MemIndexedMode &/*AM*/,
3974 SelectionDAG &/*DAG*/) const {
3975 return false;
3976 }
3977
3978 /// Returns true by value, base pointer and offset pointer and addressing mode
3979 /// by reference if this node can be combined with a load / store to form a
3980 /// post-indexed load / store.
3981 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
3982 SDValue &/*Base*/,
3983 SDValue &/*Offset*/,
3984 ISD::MemIndexedMode &/*AM*/,
3985 SelectionDAG &/*DAG*/) const {
3986 return false;
3987 }
3988
3989 /// Returns true if the specified base+offset is a legal indexed addressing
3990 /// mode for this target. \p MI is the load or store instruction that is being
3991 /// considered for transformation.
3992 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
3993 bool IsPre, MachineRegisterInfo &MRI) const {
3994 return false;
3995 }
3996
3997 /// Return the entry encoding for a jump table in the current function. The
3998 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
3999 virtual unsigned getJumpTableEncoding() const;
4000
4001 virtual MVT getJumpTableRegTy(const DataLayout &DL) const {
4002 return getPointerTy(DL);
4003 }
4004
4005 virtual const MCExpr *
4006 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
4007 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
4008 MCContext &/*Ctx*/) const {
4009 llvm_unreachable("Need to implement this hook if target has custom JTIs");
4010 }
4011
4012 /// Returns relocation base for the given PIC jumptable.
4013 virtual SDValue getPICJumpTableRelocBase(SDValue Table,
4014 SelectionDAG &DAG) const;
4015
4016 /// This returns the relocation base for the given PIC jumptable, the same as
4017 /// getPICJumpTableRelocBase, but as an MCExpr.
4018 virtual const MCExpr *
4019 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
4020 unsigned JTI, MCContext &Ctx) const;
4021
4022 /// Return true if folding a constant offset with the given GlobalAddress is
4023 /// legal. It is frequently not legal in PIC relocation models.
4024 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
4025
4026 /// On x86, return true if the operand with index OpNo is a CALL or JUMP
4027 /// instruction, which can use either a memory constraint or an address
4028 /// constraint. -fasm-blocks "__asm call foo" lowers to
4029 /// call void asm sideeffect inteldialect "call ${0:P}", "*m..."
4030 ///
4031 /// This function is used by a hack to choose the address constraint,
4032 /// lowering to a direct call.
4033 virtual bool
4034 isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
4035 unsigned OpNo) const {
4036 return false;
4037 }
4038
4039 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
4040 SDValue &Chain) const;
4041
4042 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
4043 SDValue &NewRHS, ISD::CondCode &CCCode,
4044 const SDLoc &DL, const SDValue OldLHS,
4045 const SDValue OldRHS) const;
4046
4047 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
4048 SDValue &NewRHS, ISD::CondCode &CCCode,
4049 const SDLoc &DL, const SDValue OldLHS,
4050 const SDValue OldRHS, SDValue &Chain,
4051 bool IsSignaling = false) const;
4052
4053 virtual SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL,
4054 SDValue Chain, MachineMemOperand *MMO,
4055 SDValue &NewLoad, SDValue Ptr,
4056 SDValue PassThru, SDValue Mask) const {
4057 llvm_unreachable("Not Implemented");
4058 }
4059
4060 virtual SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL,
4061 SDValue Chain, MachineMemOperand *MMO,
4062 SDValue Ptr, SDValue Val,
4063 SDValue Mask) const {
4064 llvm_unreachable("Not Implemented");
4065 }
4066
4067 /// Returns a pair of (return value, chain).
4068 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
4069 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
4070 EVT RetVT, ArrayRef<SDValue> Ops,
4071 MakeLibCallOptions CallOptions,
4072 const SDLoc &dl,
4073 SDValue Chain = SDValue()) const;
4074
4075 /// Check whether parameters to a call that are passed in callee saved
4076 /// registers are the same as from the calling function. This needs to be
4077 /// checked for tail call eligibility.
4078 bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
4079 const uint32_t *CallerPreservedMask,
4080 const SmallVectorImpl<CCValAssign> &ArgLocs,
4081 const SmallVectorImpl<SDValue> &OutVals) const;
4082
4083 //===--------------------------------------------------------------------===//
4084 // TargetLowering Optimization Methods
4085 //
4086
4087 /// A convenience struct that encapsulates a DAG, and two SDValues for
4088 /// returning information from TargetLowering to its clients that want to
4089 /// combine.
4090 struct TargetLoweringOpt {
4091 SelectionDAG &DAG;
4092 bool LegalTys;
4093 bool LegalOps;
4094 SDValue Old;
4095 SDValue New;
4096
4097 explicit TargetLoweringOpt(SelectionDAG &InDAG,
4098 bool LT, bool LO) :
4099 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
4100
4101 bool LegalTypes() const { return LegalTys; }
4102 bool LegalOperations() const { return LegalOps; }
4103
4104 bool CombineTo(SDValue O, SDValue N) {
4105 Old = O;
4106 New = N;
4107 return true;
4108 }
4109 };
4110
4111 /// Determines the optimal series of memory ops to replace the memset / memcpy.
4112 /// Return true if the number of memory ops is below the threshold (Limit).
4113 /// Note that this is always the case when Limit is ~0.
4114 /// It returns the types of the sequence of memory ops to perform
4115 /// memset / memcpy by reference.
4116 virtual bool
4117 findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
4118 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
4119 const AttributeList &FuncAttributes) const;
4120
4121 /// Check to see if the specified operand of the specified instruction is a
4122 /// constant integer. If so, check to see if there are any bits set in the
4123 /// constant that are not demanded. If so, shrink the constant and return
4124 /// true.
4125 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
4126 const APInt &DemandedElts,
4127 TargetLoweringOpt &TLO) const;
4128
4129 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements.
4130 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
4131 TargetLoweringOpt &TLO) const;
4132
4133 // Target hook to do target-specific const optimization, which is called by
4134 // ShrinkDemandedConstant. This function should return true if the target
4135 // doesn't want ShrinkDemandedConstant to further optimize the constant.
4136 virtual bool targetShrinkDemandedConstant(SDValue Op,
4137 const APInt &DemandedBits,
4138 const APInt &DemandedElts,
4139 TargetLoweringOpt &TLO) const {
4140 return false;
4141 }
4142
4143 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
4144 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
4145 /// but it could be generalized for targets with other types of implicit
4146 /// widening casts.
4147 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
4148 const APInt &DemandedBits,
4149 TargetLoweringOpt &TLO) const;
4150
4151 /// Look at Op. At this point, we know that only the DemandedBits bits of the
4152 /// result of Op are ever used downstream. If we can use this information to
4153 /// simplify Op, create a new simplified DAG node and return true, returning
4154 /// the original and new nodes in Old and New. Otherwise, analyze the
4155 /// expression and return a mask of KnownOne and KnownZero bits for the
4156 /// expression (used to simplify the caller). The KnownZero/One bits may only
4157 /// be accurate for those bits in the Demanded masks.
4158 /// \p AssumeSingleUse When this parameter is true, this function will
4159 /// attempt to simplify \p Op even if there are multiple uses.
4160 /// Callers are responsible for correctly updating the DAG based on the
4161 /// results of this function, because simply replacing TLO.Old
4162 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
4163 /// has multiple uses.
4164 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4165 const APInt &DemandedElts, KnownBits &Known,
4166 TargetLoweringOpt &TLO, unsigned Depth = 0,
4167 bool AssumeSingleUse = false) const;
4168
4169 /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
4170 /// Adds Op back to the worklist upon success.
4171 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4172 KnownBits &Known, TargetLoweringOpt &TLO,
4173 unsigned Depth = 0,
4174 bool AssumeSingleUse = false) const;
4175
4176 /// Helper wrapper around SimplifyDemandedBits.
4177 /// Adds Op back to the worklist upon success.
4178 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4179 DAGCombinerInfo &DCI) const;
4180
4181 /// Helper wrapper around SimplifyDemandedBits.
4182 /// Adds Op back to the worklist upon success.
4183 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4184 const APInt &DemandedElts,
4185 DAGCombinerInfo &DCI) const;
4186
4187 /// More limited version of SimplifyDemandedBits that can be used to "look
4188 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
4189 /// bitwise ops etc.
4190 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
4191 const APInt &DemandedElts,
4192 SelectionDAG &DAG,
4193 unsigned Depth = 0) const;
4194
4195 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
4196 /// elements.
4197 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
4198 SelectionDAG &DAG,
4199 unsigned Depth = 0) const;
4200
4201 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
4202 /// bits from only some vector elements.
4203 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op,
4204 const APInt &DemandedElts,
4205 SelectionDAG &DAG,
4206 unsigned Depth = 0) const;
4207
4208 /// Look at Vector Op. At this point, we know that only the DemandedElts
4209 /// elements of the result of Op are ever used downstream. If we can use
4210 /// this information to simplify Op, create a new simplified DAG node and
4211 /// return true, storing the original and new nodes in TLO.
4212 /// Otherwise, analyze the expression and return a mask of KnownUndef and
4213 /// KnownZero elements for the expression (used to simplify the caller).
4214 /// The KnownUndef/Zero elements may only be accurate for those bits
4215 /// in the DemandedMask.
4216 /// \p AssumeSingleUse When this parameter is true, this function will
4217 /// attempt to simplify \p Op even if there are multiple uses.
4218 /// Callers are responsible for correctly updating the DAG based on the
4219 /// results of this function, because simply replacing TLO.Old
4220 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
4221 /// has multiple uses.
4222 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
4223 APInt &KnownUndef, APInt &KnownZero,
4224 TargetLoweringOpt &TLO, unsigned Depth = 0,
4225 bool AssumeSingleUse = false) const;
4226
4227 /// Helper wrapper around SimplifyDemandedVectorElts.
4228 /// Adds Op back to the worklist upon success.
4229 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
4230 DAGCombinerInfo &DCI) const;
4231
4232 /// Return true if the target supports simplifying demanded vector elements by
4233 /// converting them to undefs.
4234 virtual bool
4235 shouldSimplifyDemandedVectorElts(SDValue Op,
4236 const TargetLoweringOpt &TLO) const {
4237 return true;
4238 }
4239
4240 /// Determine which of the bits specified in Mask are known to be either zero
4241 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
4242 /// argument allows us to only collect the known bits that are shared by the
4243 /// requested vector elements.
4244 virtual void computeKnownBitsForTargetNode(const SDValue Op,
4245 KnownBits &Known,
4246 const APInt &DemandedElts,
4247 const SelectionDAG &DAG,
4248 unsigned Depth = 0) const;
4249
4250 /// Determine which of the bits specified in Mask are known to be either zero
4251 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
4252 /// argument allows us to only collect the known bits that are shared by the
4253 /// requested vector elements. This is for GISel.
4254 virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis,
4255 Register R, KnownBits &Known,
4256 const APInt &DemandedElts,
4257 const MachineRegisterInfo &MRI,
4258 unsigned Depth = 0) const;
4259
4260 virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis,
4261 Register R,
4262 KnownFPClass &Known,
4263 const APInt &DemandedElts,
4264 const MachineRegisterInfo &MRI,
4265 unsigned Depth = 0) const;
4266
4267 /// Determine the known alignment for the pointer value \p R. This is can
4268 /// typically be inferred from the number of low known 0 bits. However, for a
4269 /// pointer with a non-integral address space, the alignment value may be
4270 /// independent from the known low bits.
4271 virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis,
4272 Register R,
4273 const MachineRegisterInfo &MRI,
4274 unsigned Depth = 0) const;
4275
4276 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
4277 /// Default implementation computes low bits based on alignment
4278 /// information. This should preserve known bits passed into it.
4279 virtual void computeKnownBitsForFrameIndex(int FIOp,
4280 KnownBits &Known,
4281 const MachineFunction &MF) const;
4282
4283 /// This method can be implemented by targets that want to expose additional
4284 /// information about sign bits to the DAG Combiner. The DemandedElts
4285 /// argument allows us to only collect the minimum sign bits that are shared
4286 /// by the requested vector elements.
4287 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
4288 const APInt &DemandedElts,
4289 const SelectionDAG &DAG,
4290 unsigned Depth = 0) const;
4291
4292 /// This method can be implemented by targets that want to expose additional
4293 /// information about sign bits to GlobalISel combiners. The DemandedElts
4294 /// argument allows us to only collect the minimum sign bits that are shared
4295 /// by the requested vector elements.
4296 virtual unsigned computeNumSignBitsForTargetInstr(
4297 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
4298 const MachineRegisterInfo &MRI, unsigned Depth = 0) const;
4299
4300 /// Attempt to simplify any target nodes based on the demanded vector
4301 /// elements, returning true on success. Otherwise, analyze the expression and
4302 /// return a mask of KnownUndef and KnownZero elements for the expression
4303 /// (used to simplify the caller). The KnownUndef/Zero elements may only be
4304 /// accurate for those bits in the DemandedMask.
4305 virtual bool SimplifyDemandedVectorEltsForTargetNode(
4306 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
4307 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
4308
4309 /// Attempt to simplify any target nodes based on the demanded bits/elts,
4310 /// returning true on success. Otherwise, analyze the
4311 /// expression and return a mask of KnownOne and KnownZero bits for the
4312 /// expression (used to simplify the caller). The KnownZero/One bits may only
4313 /// be accurate for those bits in the Demanded masks.
4314 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
4315 const APInt &DemandedBits,
4316 const APInt &DemandedElts,
4317 KnownBits &Known,
4318 TargetLoweringOpt &TLO,
4319 unsigned Depth = 0) const;
4320
4321 /// More limited version of SimplifyDemandedBits that can be used to "look
4322 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
4323 /// bitwise ops etc.
4324 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
4325 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4326 SelectionDAG &DAG, unsigned Depth) const;
4327
4328 /// Return true if this function can prove that \p Op is never poison
4329 /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts
4330 /// argument limits the check to the requested vector elements.
4331 virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4332 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4333 bool PoisonOnly, unsigned Depth) const;
4334
4335 /// Return true if Op can create undef or poison from non-undef & non-poison
4336 /// operands. The DemandedElts argument limits the check to the requested
4337 /// vector elements.
4338 virtual bool
4339 canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts,
4340 const SelectionDAG &DAG, bool PoisonOnly,
4341 bool ConsiderFlags, unsigned Depth) const;
4342
4343 /// Tries to build a legal vector shuffle using the provided parameters
4344 /// or equivalent variations. The Mask argument maybe be modified as the
4345 /// function tries different variations.
4346 /// Returns an empty SDValue if the operation fails.
4347 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4348 SDValue N1, MutableArrayRef<int> Mask,
4349 SelectionDAG &DAG) const;
4350
4351 /// This method returns the constant pool value that will be loaded by LD.
4352 /// NOTE: You must check for implicit extensions of the constant by LD.
4353 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
4354
4355 /// If \p SNaN is false, \returns true if \p Op is known to never be any
4356 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
4357 /// NaN.
4358 virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
4359 const APInt &DemandedElts,
4360 const SelectionDAG &DAG,
4361 bool SNaN = false,
4362 unsigned Depth = 0) const;
4363
4364 /// Return true if vector \p Op has the same value across all \p DemandedElts,
4365 /// indicating any elements which may be undef in the output \p UndefElts.
4366 virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
4367 APInt &UndefElts,
4368 const SelectionDAG &DAG,
4369 unsigned Depth = 0) const;
4370
4371 /// Returns true if the given Opc is considered a canonical constant for the
4372 /// target, which should not be transformed back into a BUILD_VECTOR.
4373 virtual bool isTargetCanonicalConstantNode(SDValue Op) const {
4374 return Op.getOpcode() == ISD::SPLAT_VECTOR ||
4375 Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS;
4376 }
4377
4378 struct DAGCombinerInfo {
4379 void *DC; // The DAG Combiner object.
4380 CombineLevel Level;
4381 bool CalledByLegalizer;
4382
4383 public:
4384 SelectionDAG &DAG;
4385
4386 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc)
4387 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
4388
4389 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
4390 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
4391 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; }
4392 CombineLevel getDAGCombineLevel() { return Level; }
4393 bool isCalledByLegalizer() const { return CalledByLegalizer; }
4394
4395 LLVM_ABI void AddToWorklist(SDNode *N);
4396 LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To,
4397 bool AddTo = true);
4398 LLVM_ABI SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
4399 LLVM_ABI SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
4400 bool AddTo = true);
4401
4402 LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N);
4403
4404 LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
4405 };
4406
4407 /// Return if the N is a constant or constant vector equal to the true value
4408 /// from getBooleanContents().
4409 bool isConstTrueVal(SDValue N) const;
4410
4411 /// Return if the N is a constant or constant vector equal to the false value
4412 /// from getBooleanContents().
4413 bool isConstFalseVal(SDValue N) const;
4414
4415 /// Return if \p N is a True value when extended to \p VT.
4416 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
4417
4418 /// Try to simplify a setcc built with the specified operands and cc. If it is
4419 /// unable to simplify it, return a null SDValue.
4420 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4421 bool foldBooleans, DAGCombinerInfo &DCI,
4422 const SDLoc &dl) const;
4423
4424 // For targets which wrap address, unwrap for analysis.
4425 virtual SDValue unwrapAddress(SDValue N) const { return N; }
4426
4427 /// Returns true (and the GlobalValue and the offset) if the node is a
4428 /// GlobalAddress + offset.
4429 virtual bool
4430 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
4431
4432 /// This method will be invoked for all target nodes and for any
4433 /// target-independent nodes that the target has registered with invoke it
4434 /// for.
4435 ///
4436 /// The semantics are as follows:
4437 /// Return Value:
4438 /// SDValue.Val == 0 - No change was made
4439 /// SDValue.Val == N - N was replaced, is dead, and is already handled.
4440 /// otherwise - N should be replaced by the returned Operand.
4441 ///
4442 /// In addition, methods provided by DAGCombinerInfo may be used to perform
4443 /// more complex transformations.
4444 ///
4445 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
4446
4447 /// Return true if it is profitable to move this shift by a constant amount
4448 /// through its operand, adjusting any immediate operands as necessary to
4449 /// preserve semantics. This transformation may not be desirable if it
4450 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
4451 /// extraction in AArch64). By default, it returns true.
4452 ///
4453 /// @param N the shift node
4454 /// @param Level the current DAGCombine legalization level.
4455 virtual bool isDesirableToCommuteWithShift(const SDNode *N,
4456 CombineLevel Level) const {
4457 SDValue ShiftLHS = N->getOperand(Num: 0);
4458 if (!ShiftLHS->hasOneUse())
4459 return false;
4460 if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
4461 !ShiftLHS.getOperand(i: 0)->hasOneUse())
4462 return false;
4463 return true;
4464 }
4465
4466 /// GlobalISel - return true if it is profitable to move this shift by a
4467 /// constant amount through its operand, adjusting any immediate operands as
4468 /// necessary to preserve semantics. This transformation may not be desirable
4469 /// if it disrupts a particularly auspicious target-specific tree (e.g.
4470 /// bitfield extraction in AArch64). By default, it returns true.
4471 ///
4472 /// @param MI the shift instruction
4473 /// @param IsAfterLegal true if running after legalization.
4474 virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI,
4475 bool IsAfterLegal) const {
4476 return true;
4477 }
4478
4479 /// GlobalISel - return true if it's profitable to perform the combine:
4480 /// shl ([sza]ext x), y => zext (shl x, y)
4481 virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const {
4482 return true;
4483 }
4484
4485 // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
4486 // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
4487 // writing this) is:
4488 // With C as a power of 2 and C != 0 and C != INT_MIN:
4489 // AddAnd:
4490 // (icmp eq A, C) | (icmp eq A, -C)
4491 // -> (icmp eq and(add(A, C), ~(C + C)), 0)
4492 // (icmp ne A, C) & (icmp ne A, -C)w
4493 // -> (icmp ne and(add(A, C), ~(C + C)), 0)
4494 // ABS:
4495 // (icmp eq A, C) | (icmp eq A, -C)
4496 // -> (icmp eq Abs(A), C)
4497 // (icmp ne A, C) & (icmp ne A, -C)w
4498 // -> (icmp ne Abs(A), C)
4499 //
4500 // @param LogicOp the logic op
4501 // @param SETCC0 the first of the SETCC nodes
4502 // @param SETCC0 the second of the SETCC nodes
4503 virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(
4504 const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
4505 return AndOrSETCCFoldKind::None;
4506 }
4507
4508 /// Return true if it is profitable to combine an XOR of a logical shift
4509 /// to create a logical shift of NOT. This transformation may not be desirable
4510 /// if it disrupts a particularly auspicious target-specific tree (e.g.
4511 /// BIC on ARM/AArch64). By default, it returns true.
4512 virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const {
4513 return true;
4514 }
4515
4516 /// Return true if the target has native support for the specified value type
4517 /// and it is 'desirable' to use the type for the given node type. e.g. On x86
4518 /// i16 is legal, but undesirable since i16 instruction encodings are longer
4519 /// and some i16 instructions are slow.
4520 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
4521 // By default, assume all legal types are desirable.
4522 return isTypeLegal(VT);
4523 }
4524
4525 /// Return true if it is profitable for dag combiner to transform a floating
4526 /// point op of specified opcode to a equivalent op of an integer
4527 /// type. e.g. f32 load -> i32 load can be profitable on ARM.
4528 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
4529 EVT /*VT*/) const {
4530 return false;
4531 }
4532
4533 /// This method query the target whether it is beneficial for dag combiner to
4534 /// promote the specified node. If true, it should return the desired
4535 /// promotion type by reference.
4536 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
4537 return false;
4538 }
4539
4540 /// Return true if the target supports swifterror attribute. It optimizes
4541 /// loads and stores to reading and writing a specific register.
4542 virtual bool supportSwiftError() const {
4543 return false;
4544 }
4545
4546 /// Return true if the target supports that a subset of CSRs for the given
4547 /// machine function is handled explicitly via copies.
4548 virtual bool supportSplitCSR(MachineFunction *MF) const {
4549 return false;
4550 }
4551
4552 /// Return true if the target supports kcfi operand bundles.
4553 virtual bool supportKCFIBundles() const { return false; }
4554
4555 /// Return true if the target supports ptrauth operand bundles.
4556 virtual bool supportPtrAuthBundles() const { return false; }
4557
4558 /// Perform necessary initialization to handle a subset of CSRs explicitly
4559 /// via copies. This function is called at the beginning of instruction
4560 /// selection.
4561 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
4562 llvm_unreachable("Not Implemented");
4563 }
4564
4565 /// Insert explicit copies in entry and exit blocks. We copy a subset of
4566 /// CSRs to virtual registers in the entry block, and copy them back to
4567 /// physical registers in the exit blocks. This function is called at the end
4568 /// of instruction selection.
4569 virtual void insertCopiesSplitCSR(
4570 MachineBasicBlock *Entry,
4571 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
4572 llvm_unreachable("Not Implemented");
4573 }
4574
4575 /// Return the newly negated expression if the cost is not expensive and
4576 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
4577 /// do the negation.
4578 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
4579 bool LegalOps, bool OptForSize,
4580 NegatibleCost &Cost,
4581 unsigned Depth = 0) const;
4582
4583 SDValue getCheaperOrNeutralNegatedExpression(
4584 SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize,
4585 const NegatibleCost CostThreshold = NegatibleCost::Neutral,
4586 unsigned Depth = 0) const {
4587 NegatibleCost Cost = NegatibleCost::Expensive;
4588 SDValue Neg =
4589 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4590 if (!Neg)
4591 return SDValue();
4592
4593 if (Cost <= CostThreshold)
4594 return Neg;
4595
4596 // Remove the new created node to avoid the side effect to the DAG.
4597 if (Neg->use_empty())
4598 DAG.RemoveDeadNode(N: Neg.getNode());
4599 return SDValue();
4600 }
4601
4602 /// This is the helper function to return the newly negated expression only
4603 /// when the cost is cheaper.
4604 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
4605 bool LegalOps, bool OptForSize,
4606 unsigned Depth = 0) const {
4607 return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize,
4608 CostThreshold: NegatibleCost::Cheaper, Depth);
4609 }
4610
4611 /// This is the helper function to return the newly negated expression if
4612 /// the cost is not expensive.
4613 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
4614 bool OptForSize, unsigned Depth = 0) const {
4615 NegatibleCost Cost = NegatibleCost::Expensive;
4616 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4617 }
4618
4619 //===--------------------------------------------------------------------===//
4620 // Lowering methods - These methods must be implemented by targets so that
4621 // the SelectionDAGBuilder code knows how to lower these.
4622 //
4623
4624 /// Target-specific splitting of values into parts that fit a register
4625 /// storing a legal type
4626 virtual bool splitValueIntoRegisterParts(
4627 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4628 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4629 return false;
4630 }
4631
4632 /// Allows the target to handle physreg-carried dependency
4633 /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether
4634 /// to add the edge to the dependency graph.
4635 /// Def - input: Selection DAG node defininfg physical register
4636 /// User - input: Selection DAG node using physical register
4637 /// Op - input: Number of User operand
4638 /// PhysReg - inout: set to the physical register if the edge is
4639 /// necessary, unchanged otherwise
4640 /// Cost - inout: physical register copy cost.
4641 /// Returns 'true' is the edge is necessary, 'false' otherwise
4642 virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
4643 const TargetRegisterInfo *TRI,
4644 const TargetInstrInfo *TII,
4645 MCRegister &PhysReg, int &Cost) const {
4646 return false;
4647 }
4648
4649 /// Target-specific combining of register parts into its original value
4650 virtual SDValue
4651 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
4652 const SDValue *Parts, unsigned NumParts,
4653 MVT PartVT, EVT ValueVT,
4654 std::optional<CallingConv::ID> CC) const {
4655 return SDValue();
4656 }
4657
4658 /// This hook must be implemented to lower the incoming (formal) arguments,
4659 /// described by the Ins array, into the specified DAG. The implementation
4660 /// should fill in the InVals array with legal-type argument values, and
4661 /// return the resulting token chain value.
4662 virtual SDValue LowerFormalArguments(
4663 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
4664 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/,
4665 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const {
4666 llvm_unreachable("Not Implemented");
4667 }
4668
4669 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
4670 ArgListTy &Args) const {}
4671
4672 /// This structure contains the information necessary for lowering
4673 /// pointer-authenticating indirect calls. It is equivalent to the "ptrauth"
4674 /// operand bundle found on the call instruction, if any.
4675 struct PtrAuthInfo {
4676 uint64_t Key;
4677 SDValue Discriminator;
4678 };
4679
4680 /// This structure contains all information that is necessary for lowering
4681 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
4682 /// needs to lower a call, and targets will see this struct in their LowerCall
4683 /// implementation.
4684 struct CallLoweringInfo {
4685 SDValue Chain;
4686 Type *RetTy = nullptr;
4687 bool RetSExt : 1;
4688 bool RetZExt : 1;
4689 bool IsVarArg : 1;
4690 bool IsInReg : 1;
4691 bool DoesNotReturn : 1;
4692 bool IsReturnValueUsed : 1;
4693 bool IsConvergent : 1;
4694 bool IsPatchPoint : 1;
4695 bool IsPreallocated : 1;
4696 bool NoMerge : 1;
4697
4698 // IsTailCall should be modified by implementations of
4699 // TargetLowering::LowerCall that perform tail call conversions.
4700 bool IsTailCall = false;
4701
4702 // Is Call lowering done post SelectionDAG type legalization.
4703 bool IsPostTypeLegalization = false;
4704
4705 unsigned NumFixedArgs = -1;
4706 CallingConv::ID CallConv = CallingConv::C;
4707 SDValue Callee;
4708 ArgListTy Args;
4709 SelectionDAG &DAG;
4710 SDLoc DL;
4711 const CallBase *CB = nullptr;
4712 SmallVector<ISD::OutputArg, 32> Outs;
4713 SmallVector<SDValue, 32> OutVals;
4714 SmallVector<ISD::InputArg, 32> Ins;
4715 SmallVector<SDValue, 4> InVals;
4716 const ConstantInt *CFIType = nullptr;
4717 SDValue ConvergenceControlToken;
4718
4719 std::optional<PtrAuthInfo> PAI;
4720
4721 CallLoweringInfo(SelectionDAG &DAG)
4722 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
4723 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
4724 IsPatchPoint(false), IsPreallocated(false), NoMerge(false),
4725 DAG(DAG) {}
4726
4727 CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
4728 DL = dl;
4729 return *this;
4730 }
4731
4732 CallLoweringInfo &setChain(SDValue InChain) {
4733 Chain = InChain;
4734 return *this;
4735 }
4736
4737 // setCallee with target/module-specific attributes
4738 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType,
4739 SDValue Target, ArgListTy &&ArgsList) {
4740 RetTy = ResultType;
4741 Callee = Target;
4742 CallConv = CC;
4743 NumFixedArgs = ArgsList.size();
4744 Args = std::move(ArgsList);
4745
4746 DAG.getTargetLoweringInfo().markLibCallAttributes(
4747 MF: &(DAG.getMachineFunction()), CC, Args);
4748 return *this;
4749 }
4750
4751 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType,
4752 SDValue Target, ArgListTy &&ArgsList,
4753 AttributeSet ResultAttrs = {}) {
4754 RetTy = ResultType;
4755 IsInReg = ResultAttrs.hasAttribute(Attribute::InReg);
4756 RetSExt = ResultAttrs.hasAttribute(Attribute::SExt);
4757 RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt);
4758 NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge);
4759
4760 Callee = Target;
4761 CallConv = CC;
4762 NumFixedArgs = ArgsList.size();
4763 Args = std::move(ArgsList);
4764 return *this;
4765 }
4766
4767 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
4768 SDValue Target, ArgListTy &&ArgsList,
4769 const CallBase &Call) {
4770 RetTy = ResultType;
4771
4772 IsInReg = Call.hasRetAttr(Attribute::InReg);
4773 DoesNotReturn =
4774 Call.doesNotReturn() ||
4775 (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode()));
4776 IsVarArg = FTy->isVarArg();
4777 IsReturnValueUsed = !Call.use_empty();
4778 RetSExt = Call.hasRetAttr(Attribute::SExt);
4779 RetZExt = Call.hasRetAttr(Attribute::ZExt);
4780 NoMerge = Call.hasFnAttr(Attribute::NoMerge);
4781
4782 Callee = Target;
4783
4784 CallConv = Call.getCallingConv();
4785 NumFixedArgs = FTy->getNumParams();
4786 Args = std::move(ArgsList);
4787
4788 CB = &Call;
4789
4790 return *this;
4791 }
4792
4793 CallLoweringInfo &setInRegister(bool Value = true) {
4794 IsInReg = Value;
4795 return *this;
4796 }
4797
4798 CallLoweringInfo &setNoReturn(bool Value = true) {
4799 DoesNotReturn = Value;
4800 return *this;
4801 }
4802
4803 CallLoweringInfo &setVarArg(bool Value = true) {
4804 IsVarArg = Value;
4805 return *this;
4806 }
4807
4808 CallLoweringInfo &setTailCall(bool Value = true) {
4809 IsTailCall = Value;
4810 return *this;
4811 }
4812
4813 CallLoweringInfo &setDiscardResult(bool Value = true) {
4814 IsReturnValueUsed = !Value;
4815 return *this;
4816 }
4817
4818 CallLoweringInfo &setConvergent(bool Value = true) {
4819 IsConvergent = Value;
4820 return *this;
4821 }
4822
4823 CallLoweringInfo &setSExtResult(bool Value = true) {
4824 RetSExt = Value;
4825 return *this;
4826 }
4827
4828 CallLoweringInfo &setZExtResult(bool Value = true) {
4829 RetZExt = Value;
4830 return *this;
4831 }
4832
4833 CallLoweringInfo &setIsPatchPoint(bool Value = true) {
4834 IsPatchPoint = Value;
4835 return *this;
4836 }
4837
4838 CallLoweringInfo &setIsPreallocated(bool Value = true) {
4839 IsPreallocated = Value;
4840 return *this;
4841 }
4842
4843 CallLoweringInfo &setPtrAuth(PtrAuthInfo Value) {
4844 PAI = Value;
4845 return *this;
4846 }
4847
4848 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
4849 IsPostTypeLegalization = Value;
4850 return *this;
4851 }
4852
4853 CallLoweringInfo &setCFIType(const ConstantInt *Type) {
4854 CFIType = Type;
4855 return *this;
4856 }
4857
4858 CallLoweringInfo &setConvergenceControlToken(SDValue Token) {
4859 ConvergenceControlToken = Token;
4860 return *this;
4861 }
4862
4863 ArgListTy &getArgs() {
4864 return Args;
4865 }
4866 };
4867
4868 /// This structure is used to pass arguments to makeLibCall function.
4869 struct MakeLibCallOptions {
4870 // By passing type list before soften to makeLibCall, the target hook
4871 // shouldExtendTypeInLibCall can get the original type before soften.
4872 ArrayRef<EVT> OpsVTBeforeSoften;
4873 EVT RetVTBeforeSoften;
4874 ArrayRef<Type *> OpsTypeOverrides;
4875
4876 bool IsSigned : 1;
4877 bool DoesNotReturn : 1;
4878 bool IsReturnValueUsed : 1;
4879 bool IsPostTypeLegalization : 1;
4880 bool IsSoften : 1;
4881
4882 MakeLibCallOptions()
4883 : IsSigned(false), DoesNotReturn(false), IsReturnValueUsed(true),
4884 IsPostTypeLegalization(false), IsSoften(false) {}
4885
4886 MakeLibCallOptions &setIsSigned(bool Value = true) {
4887 IsSigned = Value;
4888 return *this;
4889 }
4890
4891 MakeLibCallOptions &setNoReturn(bool Value = true) {
4892 DoesNotReturn = Value;
4893 return *this;
4894 }
4895
4896 MakeLibCallOptions &setDiscardResult(bool Value = true) {
4897 IsReturnValueUsed = !Value;
4898 return *this;
4899 }
4900
4901 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
4902 IsPostTypeLegalization = Value;
4903 return *this;
4904 }
4905
4906 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
4907 bool Value = true) {
4908 OpsVTBeforeSoften = OpsVT;
4909 RetVTBeforeSoften = RetVT;
4910 IsSoften = Value;
4911 return *this;
4912 }
4913
4914 /// Override the argument type for an operand. Leave the type as null to use
4915 /// the type from the operand's node.
4916 MakeLibCallOptions &setOpsTypeOverrides(ArrayRef<Type *> OpsTypes) {
4917 OpsTypeOverrides = OpsTypes;
4918 return *this;
4919 }
4920 };
4921
4922 /// This function lowers an abstract call to a function into an actual call.
4923 /// This returns a pair of operands. The first element is the return value
4924 /// for the function (if RetTy is not VoidTy). The second element is the
4925 /// outgoing token chain. It calls LowerCall to do the actual lowering.
4926 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
4927
4928 /// This hook must be implemented to lower calls into the specified
4929 /// DAG. The outgoing arguments to the call are described by the Outs array,
4930 /// and the values to be returned by the call are described by the Ins
4931 /// array. The implementation should fill in the InVals array with legal-type
4932 /// return values from the call, and return the resulting token chain value.
4933 virtual SDValue
4934 LowerCall(CallLoweringInfo &/*CLI*/,
4935 SmallVectorImpl<SDValue> &/*InVals*/) const {
4936 llvm_unreachable("Not Implemented");
4937 }
4938
4939 /// Target-specific cleanup for formal ByVal parameters.
4940 virtual void HandleByVal(CCState *, unsigned &, Align) const {}
4941
4942 /// This hook should be implemented to check whether the return values
4943 /// described by the Outs array can fit into the return registers. If false
4944 /// is returned, an sret-demotion is performed.
4945 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
4946 MachineFunction &/*MF*/, bool /*isVarArg*/,
4947 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
4948 LLVMContext &/*Context*/, const Type *RetTy) const
4949 {
4950 // Return true by default to get preexisting behavior.
4951 return true;
4952 }
4953
4954 /// This hook must be implemented to lower outgoing return values, described
4955 /// by the Outs array, into the specified DAG. The implementation should
4956 /// return the resulting token chain value.
4957 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
4958 bool /*isVarArg*/,
4959 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/,
4960 const SmallVectorImpl<SDValue> & /*OutVals*/,
4961 const SDLoc & /*dl*/,
4962 SelectionDAG & /*DAG*/) const {
4963 llvm_unreachable("Not Implemented");
4964 }
4965
4966 /// Return true if result of the specified node is used by a return node
4967 /// only. It also compute and return the input chain for the tail call.
4968 ///
4969 /// This is used to determine whether it is possible to codegen a libcall as
4970 /// tail call at legalization time.
4971 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const {
4972 return false;
4973 }
4974
4975 /// Return true if the target may be able emit the call instruction as a tail
4976 /// call. This is used by optimization passes to determine if it's profitable
4977 /// to duplicate return instructions to enable tailcall optimization.
4978 virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
4979 return false;
4980 }
4981
4982 /// Return the register ID of the name passed in. Used by named register
4983 /// global variables extension. There is no target-independent behaviour
4984 /// so the default action is to bail.
4985 virtual Register getRegisterByName(const char* RegName, LLT Ty,
4986 const MachineFunction &MF) const {
4987 report_fatal_error(reason: "Named registers not implemented for this target");
4988 }
4989
4990 /// Return the type that should be used to zero or sign extend a
4991 /// zeroext/signext integer return value. FIXME: Some C calling conventions
4992 /// require the return type to be promoted, but this is not true all the time,
4993 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
4994 /// conventions. The frontend should handle this and include all of the
4995 /// necessary information.
4996 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
4997 ISD::NodeType /*ExtendKind*/) const {
4998 EVT MinVT = getRegisterType(MVT::i32);
4999 return VT.bitsLT(VT: MinVT) ? MinVT : VT;
5000 }
5001
5002 /// For some targets, an LLVM struct type must be broken down into multiple
5003 /// simple types, but the calling convention specifies that the entire struct
5004 /// must be passed in a block of consecutive registers.
5005 virtual bool
5006 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
5007 bool isVarArg,
5008 const DataLayout &DL) const {
5009 return false;
5010 }
5011
5012 /// For most targets, an LLVM type must be broken down into multiple
5013 /// smaller types. Usually the halves are ordered according to the endianness
5014 /// but for some platform that would break. So this method will default to
5015 /// matching the endianness but can be overridden.
5016 virtual bool
5017 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
5018 return DL.isLittleEndian();
5019 }
5020
5021 /// Returns a 0 terminated array of registers that can be safely used as
5022 /// scratch registers.
5023 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
5024 return nullptr;
5025 }
5026
5027 /// Returns a 0 terminated array of rounding control registers that can be
5028 /// attached into strict FP call.
5029 virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const {
5030 return ArrayRef<MCPhysReg>();
5031 }
5032
5033 /// This callback is used to prepare for a volatile or atomic load.
5034 /// It takes a chain node as input and returns the chain for the load itself.
5035 ///
5036 /// Having a callback like this is necessary for targets like SystemZ,
5037 /// which allows a CPU to reuse the result of a previous load indefinitely,
5038 /// even if a cache-coherent store is performed by another CPU. The default
5039 /// implementation does nothing.
5040 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
5041 SelectionDAG &DAG) const {
5042 return Chain;
5043 }
5044
5045 /// This callback is invoked by the type legalizer to legalize nodes with an
5046 /// illegal operand type but legal result types. It replaces the
5047 /// LowerOperation callback in the type Legalizer. The reason we can not do
5048 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to
5049 /// use this callback.
5050 ///
5051 /// TODO: Consider merging with ReplaceNodeResults.
5052 ///
5053 /// The target places new result values for the node in Results (their number
5054 /// and types must exactly match those of the original return values of
5055 /// the node), or leaves Results empty, which indicates that the node is not
5056 /// to be custom lowered after all.
5057 /// The default implementation calls LowerOperation.
5058 virtual void LowerOperationWrapper(SDNode *N,
5059 SmallVectorImpl<SDValue> &Results,
5060 SelectionDAG &DAG) const;
5061
5062 /// This callback is invoked for operations that are unsupported by the
5063 /// target, which are registered to use 'custom' lowering, and whose defined
5064 /// values are all legal. If the target has no operations that require custom
5065 /// lowering, it need not implement this. The default implementation of this
5066 /// aborts.
5067 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
5068
5069 /// This callback is invoked when a node result type is illegal for the
5070 /// target, and the operation was registered to use 'custom' lowering for that
5071 /// result type. The target places new result values for the node in Results
5072 /// (their number and types must exactly match those of the original return
5073 /// values of the node), or leaves Results empty, which indicates that the
5074 /// node is not to be custom lowered after all.
5075 ///
5076 /// If the target has no operations that require custom lowering, it need not
5077 /// implement this. The default implementation aborts.
5078 virtual void ReplaceNodeResults(SDNode * /*N*/,
5079 SmallVectorImpl<SDValue> &/*Results*/,
5080 SelectionDAG &/*DAG*/) const {
5081 llvm_unreachable("ReplaceNodeResults not implemented for this target!");
5082 }
5083
5084 /// This method returns the name of a target specific DAG node.
5085 virtual const char *getTargetNodeName(unsigned Opcode) const;
5086
5087 /// This method returns a target specific FastISel object, or null if the
5088 /// target does not support "fast" ISel.
5089 virtual FastISel *createFastISel(FunctionLoweringInfo &,
5090 const TargetLibraryInfo *) const {
5091 return nullptr;
5092 }
5093
5094 bool verifyReturnAddressArgumentIsConstant(SDValue Op,
5095 SelectionDAG &DAG) const;
5096
5097 //===--------------------------------------------------------------------===//
5098 // Inline Asm Support hooks
5099 //
5100
5101 /// This hook allows the target to expand an inline asm call to be explicit
5102 /// llvm code if it wants to. This is useful for turning simple inline asms
5103 /// into LLVM intrinsics, which gives the compiler more information about the
5104 /// behavior of the code.
5105 virtual bool ExpandInlineAsm(CallInst *) const {
5106 return false;
5107 }
5108
5109 enum ConstraintType {
5110 C_Register, // Constraint represents specific register(s).
5111 C_RegisterClass, // Constraint represents any of register(s) in class.
5112 C_Memory, // Memory constraint.
5113 C_Address, // Address constraint.
5114 C_Immediate, // Requires an immediate.
5115 C_Other, // Something else.
5116 C_Unknown // Unsupported constraint.
5117 };
5118
5119 enum ConstraintWeight {
5120 // Generic weights.
5121 CW_Invalid = -1, // No match.
5122 CW_Okay = 0, // Acceptable.
5123 CW_Good = 1, // Good weight.
5124 CW_Better = 2, // Better weight.
5125 CW_Best = 3, // Best weight.
5126
5127 // Well-known weights.
5128 CW_SpecificReg = CW_Okay, // Specific register operands.
5129 CW_Register = CW_Good, // Register operands.
5130 CW_Memory = CW_Better, // Memory operands.
5131 CW_Constant = CW_Best, // Constant operand.
5132 CW_Default = CW_Okay // Default or don't know type.
5133 };
5134
5135 /// This contains information for each constraint that we are lowering.
5136 struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
5137 /// This contains the actual string for the code, like "m". TargetLowering
5138 /// picks the 'best' code from ConstraintInfo::Codes that most closely
5139 /// matches the operand.
5140 std::string ConstraintCode;
5141
5142 /// Information about the constraint code, e.g. Register, RegisterClass,
5143 /// Memory, Other, Unknown.
5144 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
5145
5146 /// If this is the result output operand or a clobber, this is null,
5147 /// otherwise it is the incoming operand to the CallInst. This gets
5148 /// modified as the asm is processed.
5149 Value *CallOperandVal = nullptr;
5150
5151 /// The ValueType for the operand value.
5152 MVT ConstraintVT = MVT::Other;
5153
5154 /// Copy constructor for copying from a ConstraintInfo.
5155 AsmOperandInfo(InlineAsm::ConstraintInfo Info)
5156 : InlineAsm::ConstraintInfo(std::move(Info)) {}
5157
5158 /// Return true of this is an input operand that is a matching constraint
5159 /// like "4".
5160 LLVM_ABI bool isMatchingInputConstraint() const;
5161
5162 /// If this is an input matching constraint, this method returns the output
5163 /// operand it matches.
5164 LLVM_ABI unsigned getMatchedOperand() const;
5165 };
5166
5167 using AsmOperandInfoVector = std::vector<AsmOperandInfo>;
5168
5169 /// Split up the constraint string from the inline assembly value into the
5170 /// specific constraints and their prefixes, and also tie in the associated
5171 /// operand values. If this returns an empty vector, and if the constraint
5172 /// string itself isn't empty, there was an error parsing.
5173 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
5174 const TargetRegisterInfo *TRI,
5175 const CallBase &Call) const;
5176
5177 /// Examine constraint type and operand type and determine a weight value.
5178 /// The operand object must already have been set up with the operand type.
5179 virtual ConstraintWeight getMultipleConstraintMatchWeight(
5180 AsmOperandInfo &info, int maIndex) const;
5181
5182 /// Examine constraint string and operand type and determine a weight value.
5183 /// The operand object must already have been set up with the operand type.
5184 virtual ConstraintWeight getSingleConstraintMatchWeight(
5185 AsmOperandInfo &info, const char *constraint) const;
5186
5187 /// Determines the constraint code and constraint type to use for the specific
5188 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5189 /// If the actual operand being passed in is available, it can be passed in as
5190 /// Op, otherwise an empty SDValue can be passed.
5191 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5192 SDValue Op,
5193 SelectionDAG *DAG = nullptr) const;
5194
5195 /// Given a constraint, return the type of constraint it is for this target.
5196 virtual ConstraintType getConstraintType(StringRef Constraint) const;
5197
5198 using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>;
5199 using ConstraintGroup = SmallVector<ConstraintPair>;
5200 /// Given an OpInfo with list of constraints codes as strings, return a
5201 /// sorted Vector of pairs of constraint codes and their types in priority of
5202 /// what we'd prefer to lower them as. This may contain immediates that
5203 /// cannot be lowered, but it is meant to be a machine agnostic order of
5204 /// preferences.
5205 ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const;
5206
5207 /// Given a physical register constraint (e.g. {edx}), return the register
5208 /// number and the register class for the register.
5209 ///
5210 /// Given a register class constraint, like 'r', if this corresponds directly
5211 /// to an LLVM register class, return a register of 0 and the register class
5212 /// pointer.
5213 ///
5214 /// This should only be used for C_Register constraints. On error, this
5215 /// returns a register number of 0 and a null register class pointer.
5216 virtual std::pair<unsigned, const TargetRegisterClass *>
5217 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5218 StringRef Constraint, MVT VT) const;
5219
5220 virtual InlineAsm::ConstraintCode
5221 getInlineAsmMemConstraint(StringRef ConstraintCode) const {
5222 if (ConstraintCode == "m")
5223 return InlineAsm::ConstraintCode::m;
5224 if (ConstraintCode == "o")
5225 return InlineAsm::ConstraintCode::o;
5226 if (ConstraintCode == "X")
5227 return InlineAsm::ConstraintCode::X;
5228 if (ConstraintCode == "p")
5229 return InlineAsm::ConstraintCode::p;
5230 return InlineAsm::ConstraintCode::Unknown;
5231 }
5232
5233 /// Try to replace an X constraint, which matches anything, with another that
5234 /// has more specific requirements based on the type of the corresponding
5235 /// operand. This returns null if there is no replacement to make.
5236 virtual const char *LowerXConstraint(EVT ConstraintVT) const;
5237
5238 /// Lower the specified operand into the Ops vector. If it is invalid, don't
5239 /// add anything to Ops.
5240 virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
5241 std::vector<SDValue> &Ops,
5242 SelectionDAG &DAG) const;
5243
5244 // Lower custom output constraints. If invalid, return SDValue().
5245 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
5246 const SDLoc &DL,
5247 const AsmOperandInfo &OpInfo,
5248 SelectionDAG &DAG) const;
5249
5250 // Targets may override this function to collect operands from the CallInst
5251 // and for example, lower them into the SelectionDAG operands.
5252 virtual void CollectTargetIntrinsicOperands(const CallInst &I,
5253 SmallVectorImpl<SDValue> &Ops,
5254 SelectionDAG &DAG) const;
5255
5256 //===--------------------------------------------------------------------===//
5257 // Div utility functions
5258 //
5259
5260 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
5261 bool IsAfterLegalTypes,
5262 SmallVectorImpl<SDNode *> &Created) const;
5263 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
5264 bool IsAfterLegalTypes,
5265 SmallVectorImpl<SDNode *> &Created) const;
5266 // Build sdiv by power-of-2 with conditional move instructions
5267 SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
5268 SelectionDAG &DAG,
5269 SmallVectorImpl<SDNode *> &Created) const;
5270
5271 /// Targets may override this function to provide custom SDIV lowering for
5272 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
5273 /// assumes SDIV is expensive and replaces it with a series of other integer
5274 /// operations.
5275 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5276 SelectionDAG &DAG,
5277 SmallVectorImpl<SDNode *> &Created) const;
5278
5279 /// Targets may override this function to provide custom SREM lowering for
5280 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
5281 /// assumes SREM is expensive and replaces it with a series of other integer
5282 /// operations.
5283 virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor,
5284 SelectionDAG &DAG,
5285 SmallVectorImpl<SDNode *> &Created) const;
5286
5287 /// Indicate whether this target prefers to combine FDIVs with the same
5288 /// divisor. If the transform should never be done, return zero. If the
5289 /// transform should be done, return the minimum number of divisor uses
5290 /// that must exist.
5291 virtual unsigned combineRepeatedFPDivisors() const {
5292 return 0;
5293 }
5294
5295 /// Hooks for building estimates in place of slower divisions and square
5296 /// roots.
5297
5298 /// Return either a square root or its reciprocal estimate value for the input
5299 /// operand.
5300 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
5301 /// 'Enabled' as set by a potential default override attribute.
5302 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
5303 /// refinement iterations required to generate a sufficient (though not
5304 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
5305 /// The boolean UseOneConstNR output is used to select a Newton-Raphson
5306 /// algorithm implementation that uses either one or two constants.
5307 /// The boolean Reciprocal is used to select whether the estimate is for the
5308 /// square root of the input operand or the reciprocal of its square root.
5309 /// A target may choose to implement its own refinement within this function.
5310 /// If that's true, then return '0' as the number of RefinementSteps to avoid
5311 /// any further refinement of the estimate.
5312 /// An empty SDValue return means no estimate sequence can be created.
5313 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
5314 int Enabled, int &RefinementSteps,
5315 bool &UseOneConstNR, bool Reciprocal) const {
5316 return SDValue();
5317 }
5318
5319 /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is
5320 /// required for correctness since InstCombine might have canonicalized a
5321 /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall
5322 /// through to the default expansion/soften to libcall, we might introduce a
5323 /// link-time dependency on libm into a file that originally did not have one.
5324 SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const;
5325
5326 /// Return a reciprocal estimate value for the input operand.
5327 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
5328 /// 'Enabled' as set by a potential default override attribute.
5329 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
5330 /// refinement iterations required to generate a sufficient (though not
5331 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
5332 /// A target may choose to implement its own refinement within this function.
5333 /// If that's true, then return '0' as the number of RefinementSteps to avoid
5334 /// any further refinement of the estimate.
5335 /// An empty SDValue return means no estimate sequence can be created.
5336 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
5337 int Enabled, int &RefinementSteps) const {
5338 return SDValue();
5339 }
5340
5341 /// Return a target-dependent comparison result if the input operand is
5342 /// suitable for use with a square root estimate calculation. For example, the
5343 /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
5344 /// result should be used as the condition operand for a select or branch.
5345 virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
5346 const DenormalMode &Mode) const;
5347
5348 /// Return a target-dependent result if the input operand is not suitable for
5349 /// use with a square root estimate calculation.
5350 virtual SDValue getSqrtResultForDenormInput(SDValue Operand,
5351 SelectionDAG &DAG) const {
5352 return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType());
5353 }
5354
5355 //===--------------------------------------------------------------------===//
5356 // Legalization utility functions
5357 //
5358
5359 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
5360 /// respectively, each computing an n/2-bit part of the result.
5361 /// \param Result A vector that will be filled with the parts of the result
5362 /// in little-endian order.
5363 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
5364 /// if you want to control how low bits are extracted from the LHS.
5365 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
5366 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
5367 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
5368 /// \returns true if the node has been expanded, false if it has not
5369 bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS,
5370 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
5371 SelectionDAG &DAG, MulExpansionKind Kind,
5372 SDValue LL = SDValue(), SDValue LH = SDValue(),
5373 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
5374
5375 /// Expand a MUL into two nodes. One that computes the high bits of
5376 /// the result and one that computes the low bits.
5377 /// \param HiLoVT The value type to use for the Lo and Hi nodes.
5378 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
5379 /// if you want to control how low bits are extracted from the LHS.
5380 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
5381 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
5382 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
5383 /// \returns true if the node has been expanded. false if it has not
5384 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5385 SelectionDAG &DAG, MulExpansionKind Kind,
5386 SDValue LL = SDValue(), SDValue LH = SDValue(),
5387 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
5388
5389 /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit
5390 /// urem by constant and other arithmetic ops. The n/2-bit urem by constant
5391 /// will be expanded by DAGCombiner. This is not possible for all constant
5392 /// divisors.
5393 /// \param N Node to expand
5394 /// \param Result A vector that will be filled with the lo and high parts of
5395 /// the results. For *DIVREM, this will be the quotient parts followed
5396 /// by the remainder parts.
5397 /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be
5398 /// half of VT.
5399 /// \param LL Low bits of the LHS of the operation. You can use this
5400 /// parameter if you want to control how low bits are extracted from
5401 /// the LHS.
5402 /// \param LH High bits of the LHS of the operation. See LL for meaning.
5403 /// \returns true if the node has been expanded, false if it has not.
5404 bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result,
5405 EVT HiLoVT, SelectionDAG &DAG,
5406 SDValue LL = SDValue(),
5407 SDValue LH = SDValue()) const;
5408
5409 /// Expand funnel shift.
5410 /// \param N Node to expand
5411 /// \returns The expansion if successful, SDValue() otherwise
5412 SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;
5413
5414 /// Expand rotations.
5415 /// \param N Node to expand
5416 /// \param AllowVectorOps expand vector rotate, this should only be performed
5417 /// if the legalization is happening outside of LegalizeVectorOps
5418 /// \returns The expansion if successful, SDValue() otherwise
5419 SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const;
5420
5421 /// Expand shift-by-parts.
5422 /// \param N Node to expand
5423 /// \param Lo lower-output-part after conversion
5424 /// \param Hi upper-output-part after conversion
5425 void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi,
5426 SelectionDAG &DAG) const;
5427
5428 /// Expand float(f32) to SINT(i64) conversion
5429 /// \param N Node to expand
5430 /// \param Result output after conversion
5431 /// \returns True, if the expansion was successful, false otherwise
5432 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
5433
5434 /// Expand float to UINT conversion
5435 /// \param N Node to expand
5436 /// \param Result output after conversion
5437 /// \param Chain output chain after conversion
5438 /// \returns True, if the expansion was successful, false otherwise
5439 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
5440 SelectionDAG &DAG) const;
5441
5442 /// Expand UINT(i64) to double(f64) conversion
5443 /// \param N Node to expand
5444 /// \param Result output after conversion
5445 /// \param Chain output chain after conversion
5446 /// \returns True, if the expansion was successful, false otherwise
5447 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
5448 SelectionDAG &DAG) const;
5449
5450 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
5451 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
5452
5453 /// Expand fminimum/fmaximum into multiple comparison with selects.
5454 SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
5455
5456 /// Expand fminimumnum/fmaximumnum into multiple comparison with selects.
5457 SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const;
5458
5459 /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
5460 /// \param N Node to expand
5461 /// \returns The expansion result
5462 SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const;
5463
5464 /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is
5465 /// not exact, force the result to be odd.
5466 /// \param ResultVT The type of result.
5467 /// \param Op The value to round.
5468 /// \returns The expansion result
5469 SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL,
5470 SelectionDAG &DAG) const;
5471
5472 /// Expand round(fp) to fp conversion
5473 /// \param N Node to expand
5474 /// \returns The expansion result
5475 SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const;
5476
5477 /// Expand check for floating point class.
5478 /// \param ResultVT The type of intrinsic call result.
5479 /// \param Op The tested value.
5480 /// \param Test The test to perform.
5481 /// \param Flags The optimization flags.
5482 /// \returns The expansion result or SDValue() if it fails.
5483 SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test,
5484 SDNodeFlags Flags, const SDLoc &DL,
5485 SelectionDAG &DAG) const;
5486
5487 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
5488 /// vector nodes can only succeed if all operations are legal/custom.
5489 /// \param N Node to expand
5490 /// \returns The expansion result or SDValue() if it fails.
5491 SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const;
5492
5493 /// Expand VP_CTPOP nodes.
5494 /// \returns The expansion result or SDValue() if it fails.
5495 SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const;
5496
5497 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
5498 /// vector nodes can only succeed if all operations are legal/custom.
5499 /// \param N Node to expand
5500 /// \returns The expansion result or SDValue() if it fails.
5501 SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
5502
5503 /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
5504 /// \param N Node to expand
5505 /// \returns The expansion result or SDValue() if it fails.
5506 SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const;
5507
5508 /// Expand CTTZ via Table Lookup.
5509 /// \param N Node to expand
5510 /// \returns The expansion result or SDValue() if it fails.
5511 SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5512 SDValue Op, unsigned NumBitsPerElt) const;
5513
5514 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
5515 /// vector nodes can only succeed if all operations are legal/custom.
5516 /// \param N Node to expand
5517 /// \returns The expansion result or SDValue() if it fails.
5518 SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
5519
5520 /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
5521 /// \param N Node to expand
5522 /// \returns The expansion result or SDValue() if it fails.
5523 SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const;
5524
5525 /// Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
5526 /// \param N Node to expand
5527 /// \returns The expansion result or SDValue() if it fails.
5528 SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const;
5529
5530 /// Expand VECTOR_FIND_LAST_ACTIVE nodes
5531 /// \param N Node to expand
5532 /// \returns The expansion result or SDValue() if it fails.
5533 SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const;
5534
5535 /// Expand ABS nodes. Expands vector/scalar ABS nodes,
5536 /// vector nodes can only succeed if all operations are legal/custom.
5537 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
5538 /// \param N Node to expand
5539 /// \param IsNegative indicate negated abs
5540 /// \returns The expansion result or SDValue() if it fails.
5541 SDValue expandABS(SDNode *N, SelectionDAG &DAG,
5542 bool IsNegative = false) const;
5543
5544 /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
5545 /// \param N Node to expand
5546 /// \returns The expansion result or SDValue() if it fails.
5547 SDValue expandABD(SDNode *N, SelectionDAG &DAG) const;
5548
5549 /// Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
5550 /// \param N Node to expand
5551 /// \returns The expansion result or SDValue() if it fails.
5552 SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const;
5553
5554 /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
5555 /// scalar types. Returns SDValue() if expand fails.
5556 /// \param N Node to expand
5557 /// \returns The expansion result or SDValue() if it fails.
5558 SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const;
5559
5560 /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with
5561 /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node
5562 /// to expand \returns The expansion result or SDValue() if it fails.
5563 SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const;
5564
5565 /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes.
5566 /// Returns SDValue() if expand fails.
5567 /// \param N Node to expand
5568 /// \returns The expansion result or SDValue() if it fails.
5569 SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const;
5570
5571 /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with
5572 /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The
5573 /// expansion result or SDValue() if it fails.
5574 SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const;
5575
5576 /// Turn load of vector type into a load of the individual elements.
5577 /// \param LD load to expand
5578 /// \returns BUILD_VECTOR and TokenFactor nodes.
5579 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD,
5580 SelectionDAG &DAG) const;
5581
5582 // Turn a store of a vector type into stores of the individual elements.
5583 /// \param ST Store with a vector value type
5584 /// \returns TokenFactor of the individual store chains.
5585 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
5586
5587 /// Expands an unaligned load to 2 half-size loads for an integer, and
5588 /// possibly more for vectors.
5589 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD,
5590 SelectionDAG &DAG) const;
5591
5592 /// Expands an unaligned store to 2 half-size stores for integer values, and
5593 /// possibly more for vectors.
5594 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
5595
5596 /// Increments memory address \p Addr according to the type of the value
5597 /// \p DataVT that should be stored. If the data is stored in compressed
5598 /// form, the memory address should be incremented according to the number of
5599 /// the stored elements. This number is equal to the number of '1's bits
5600 /// in the \p Mask.
5601 /// \p DataVT is a vector type. \p Mask is a vector value.
5602 /// \p DataVT and \p Mask have the same number of vector elements.
5603 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
5604 EVT DataVT, SelectionDAG &DAG,
5605 bool IsCompressedMemory) const;
5606
5607 /// Get a pointer to vector element \p Idx located in memory for a vector of
5608 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
5609 /// bounds the returned pointer is unspecified, but will be within the vector
5610 /// bounds.
5611 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
5612 SDValue Index) const;
5613
5614 /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located
5615 /// in memory for a vector of type \p VecVT starting at a base address of
5616 /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the
5617 /// returned pointer is unspecified, but the value returned will be such that
5618 /// the entire subvector would be within the vector bounds.
5619 SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
5620 EVT SubVecVT, SDValue Index) const;
5621
5622 /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This
5623 /// method accepts integers as its arguments.
5624 SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const;
5625
5626 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
5627 /// method accepts integers as its arguments.
5628 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
5629
5630 /// Method for building the DAG expansion of ISD::[US]CMP. This
5631 /// method accepts integers as its arguments
5632 SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const;
5633
5634 /// Method for building the DAG expansion of ISD::[US]SHLSAT. This
5635 /// method accepts integers as its arguments.
5636 SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const;
5637
5638 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
5639 /// method accepts integers as its arguments.
5640 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
5641
5642 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
5643 /// method accepts integers as its arguments.
5644 /// Note: This method may fail if the division could not be performed
5645 /// within the type. Clients must retry with a wider type if this happens.
5646 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
5647 SDValue LHS, SDValue RHS,
5648 unsigned Scale, SelectionDAG &DAG) const;
5649
5650 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
5651 /// always suceeds and populates the Result and Overflow arguments.
5652 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5653 SelectionDAG &DAG) const;
5654
5655 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
5656 /// always suceeds and populates the Result and Overflow arguments.
5657 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5658 SelectionDAG &DAG) const;
5659
5660 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
5661 /// expansion was successful and populates the Result and Overflow arguments.
5662 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5663 SelectionDAG &DAG) const;
5664
5665 /// Calculate the product twice the width of LHS and RHS. If HiLHS/HiRHS are
5666 /// non-null they will be included in the multiplication. The expansion works
5667 /// by splitting the 2 inputs into 4 pieces that we can multiply and add
5668 /// together without neding MULH or MUL_LOHI.
5669 void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5670 SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS,
5671 SDValue HiLHS = SDValue(),
5672 SDValue HiRHS = SDValue()) const;
5673
5674 /// Calculate full product of LHS and RHS either via a libcall or through
5675 /// brute force expansion of the multiplication. The expansion works by
5676 /// splitting the 2 inputs into 4 pieces that we can multiply and add together
5677 /// without needing MULH or MUL_LOHI.
5678 void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5679 const SDValue LHS, const SDValue RHS, SDValue &Lo,
5680 SDValue &Hi) const;
5681
5682 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
5683 /// only the first Count elements of the vector are used.
5684 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
5685
5686 /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
5687 SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
5688
5689 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
5690 /// Returns true if the expansion was successful.
5691 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
5692
5693 /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This
5694 /// method accepts vectors as its arguments.
5695 SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
5696
5697 /// Expand a vector VECTOR_COMPRESS into a sequence of extract element, store
5698 /// temporarily, advance store position, before re-loading the final vector.
5699 SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const;
5700
5701 /// Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations,
5702 /// consisting of zext/sext, extract_subvector, mul and add operations.
5703 SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const;
5704
5705 /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC
5706 /// on the current target. A VP_SETCC will additionally be given a Mask
5707 /// and/or EVL not equal to SDValue().
5708 ///
5709 /// If the SETCC has been legalized using AND / OR, then the legalized node
5710 /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
5711 /// will be set to false. This will also hold if the VP_SETCC has been
5712 /// legalized using VP_AND / VP_OR.
5713 ///
5714 /// If the SETCC / VP_SETCC has been legalized by using
5715 /// getSetCCSwappedOperands(), then the values of LHS and RHS will be
5716 /// swapped, CC will be set to the new condition, and NeedInvert will be set
5717 /// to false.
5718 ///
5719 /// If the SETCC / VP_SETCC has been legalized using the inverse condcode,
5720 /// then LHS and RHS will be unchanged, CC will set to the inverted condcode,
5721 /// and NeedInvert will be set to true. The caller must invert the result of
5722 /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to
5723 /// swap the effect of a true/false result.
5724 ///
5725 /// \returns true if the SETCC / VP_SETCC has been legalized, false if it
5726 /// hasn't.
5727 bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
5728 SDValue &RHS, SDValue &CC, SDValue Mask,
5729 SDValue EVL, bool &NeedInvert, const SDLoc &dl,
5730 SDValue &Chain, bool IsSignaling = false) const;
5731
5732 //===--------------------------------------------------------------------===//
5733 // Instruction Emitting Hooks
5734 //
5735
5736 /// This method should be implemented by targets that mark instructions with
5737 /// the 'usesCustomInserter' flag. These instructions are special in various
5738 /// ways, which require special support to insert. The specified MachineInstr
5739 /// is created but not inserted into any basic blocks, and this method is
5740 /// called to expand it into a sequence of instructions, potentially also
5741 /// creating new basic blocks and control flow.
5742 /// As long as the returned basic block is different (i.e., we created a new
5743 /// one), the custom inserter is free to modify the rest of \p MBB.
5744 virtual MachineBasicBlock *
5745 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
5746
5747 /// This method should be implemented by targets that mark instructions with
5748 /// the 'hasPostISelHook' flag. These instructions must be adjusted after
5749 /// instruction selection by target hooks. e.g. To fill in optional defs for
5750 /// ARM 's' setting instructions.
5751 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
5752 SDNode *Node) const;
5753
5754 /// If this function returns true, SelectionDAGBuilder emits a
5755 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
5756 virtual bool useLoadStackGuardNode(const Module &M) const { return false; }
5757
5758 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
5759 const SDLoc &DL) const {
5760 llvm_unreachable("not implemented for this target");
5761 }
5762
5763 /// Lower TLS global address SDNode for target independent emulated TLS model.
5764 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
5765 SelectionDAG &DAG) const;
5766
5767 /// Expands target specific indirect branch for the case of JumpTable
5768 /// expansion.
5769 virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
5770 SDValue Addr, int JTI,
5771 SelectionDAG &DAG) const;
5772
5773 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
5774 // If we're comparing for equality to zero and isCtlzFast is true, expose the
5775 // fact that this can be implemented as a ctlz/srl pair, so that the dag
5776 // combiner can fold the new nodes.
5777 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
5778
5779 // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y`
5780 virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const {
5781 return true;
5782 }
5783
5784 // Expand vector operation by dividing it into smaller length operations and
5785 // joining their results. SDValue() is returned when expansion did not happen.
5786 SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const;
5787
5788 /// Replace an extraction of a load with a narrowed load.
5789 ///
5790 /// \param ResultVT type of the result extraction.
5791 /// \param InVecVT type of the input vector to with bitcasts resolved.
5792 /// \param EltNo index of the vector element to load.
5793 /// \param OriginalLoad vector load that to be replaced.
5794 /// \returns \p ResultVT Load on success SDValue() on failure.
5795 SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL,
5796 EVT InVecVT, SDValue EltNo,
5797 LoadSDNode *OriginalLoad,
5798 SelectionDAG &DAG) const;
5799
5800private:
5801 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5802 const SDLoc &DL, DAGCombinerInfo &DCI) const;
5803 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5804 const SDLoc &DL, DAGCombinerInfo &DCI) const;
5805
5806 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
5807 SDValue N1, ISD::CondCode Cond,
5808 DAGCombinerInfo &DCI,
5809 const SDLoc &DL) const;
5810
5811 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5812 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
5813 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
5814 DAGCombinerInfo &DCI, const SDLoc &DL) const;
5815
5816 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5817 SDValue CompTargetNode, ISD::CondCode Cond,
5818 DAGCombinerInfo &DCI, const SDLoc &DL,
5819 SmallVectorImpl<SDNode *> &Created) const;
5820 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
5821 ISD::CondCode Cond, DAGCombinerInfo &DCI,
5822 const SDLoc &DL) const;
5823
5824 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5825 SDValue CompTargetNode, ISD::CondCode Cond,
5826 DAGCombinerInfo &DCI, const SDLoc &DL,
5827 SmallVectorImpl<SDNode *> &Created) const;
5828 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
5829 ISD::CondCode Cond, DAGCombinerInfo &DCI,
5830 const SDLoc &DL) const;
5831};
5832
5833/// Given an LLVM IR type and return type attributes, compute the return value
5834/// EVTs and flags, and optionally also the offsets, if the return value is
5835/// being lowered to memory.
5836LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
5837 AttributeList attr,
5838 SmallVectorImpl<ISD::OutputArg> &Outs,
5839 const TargetLowering &TLI, const DataLayout &DL);
5840
5841} // end namespace llvm
5842
5843#endif // LLVM_CODEGEN_TARGETLOWERING_H
5844

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of llvm/include/llvm/CodeGen/TargetLowering.h