1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/CodeGen/DAGCombine.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/LowLevelTypeUtils.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/RuntimeLibcalls.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/CodeGenTypes/MachineValueType.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/Type.h"
49#include "llvm/Support/Alignment.h"
50#include "llvm/Support/AtomicOrdering.h"
51#include "llvm/Support/Casting.h"
52#include "llvm/Support/ErrorHandling.h"
53#include <algorithm>
54#include <cassert>
55#include <climits>
56#include <cstdint>
57#include <iterator>
58#include <map>
59#include <string>
60#include <utility>
61#include <vector>
62
63namespace llvm {
64
65class AssumptionCache;
66class CCState;
67class CCValAssign;
68enum class ComplexDeinterleavingOperation;
69enum class ComplexDeinterleavingRotation;
70class Constant;
71class FastISel;
72class FunctionLoweringInfo;
73class GlobalValue;
74class Loop;
75class GISelKnownBits;
76class IntrinsicInst;
77class IRBuilderBase;
78struct KnownBits;
79class LLVMContext;
80class MachineBasicBlock;
81class MachineFunction;
82class MachineInstr;
83class MachineJumpTableInfo;
84class MachineLoop;
85class MachineRegisterInfo;
86class MCContext;
87class MCExpr;
88class Module;
89class ProfileSummaryInfo;
90class TargetLibraryInfo;
91class TargetMachine;
92class TargetRegisterClass;
93class TargetRegisterInfo;
94class TargetTransformInfo;
95class Value;
96
97namespace Sched {
98
99enum Preference {
100 None, // No preference
101 Source, // Follow source order.
102 RegPressure, // Scheduling for lowest register pressure.
103 Hybrid, // Scheduling for both latency and register pressure.
104 ILP, // Scheduling for ILP in low register pressure mode.
105 VLIW, // Scheduling for VLIW targets.
106 Fast, // Fast suboptimal list scheduling
107 Linearize // Linearize DAG, no scheduling
108};
109
110} // end namespace Sched
111
112// MemOp models a memory operation, either memset or memcpy/memmove.
113struct MemOp {
114private:
115 // Shared
116 uint64_t Size;
117 bool DstAlignCanChange; // true if destination alignment can satisfy any
118 // constraint.
119 Align DstAlign; // Specified alignment of the memory operation.
120
121 bool AllowOverlap;
122 // memset only
123 bool IsMemset; // If setthis memory operation is a memset.
124 bool ZeroMemset; // If set clears out memory with zeros.
125 // memcpy only
126 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
127 // constant so it does not need to be loaded.
128 Align SrcAlign; // Inferred alignment of the source or default value if the
129 // memory operation does not need to load the value.
130public:
131 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
132 Align SrcAlign, bool IsVolatile,
133 bool MemcpyStrSrc = false) {
134 MemOp Op;
135 Op.Size = Size;
136 Op.DstAlignCanChange = DstAlignCanChange;
137 Op.DstAlign = DstAlign;
138 Op.AllowOverlap = !IsVolatile;
139 Op.IsMemset = false;
140 Op.ZeroMemset = false;
141 Op.MemcpyStrSrc = MemcpyStrSrc;
142 Op.SrcAlign = SrcAlign;
143 return Op;
144 }
145
146 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
147 bool IsZeroMemset, bool IsVolatile) {
148 MemOp Op;
149 Op.Size = Size;
150 Op.DstAlignCanChange = DstAlignCanChange;
151 Op.DstAlign = DstAlign;
152 Op.AllowOverlap = !IsVolatile;
153 Op.IsMemset = true;
154 Op.ZeroMemset = IsZeroMemset;
155 Op.MemcpyStrSrc = false;
156 return Op;
157 }
158
159 uint64_t size() const { return Size; }
160 Align getDstAlign() const {
161 assert(!DstAlignCanChange);
162 return DstAlign;
163 }
164 bool isFixedDstAlign() const { return !DstAlignCanChange; }
165 bool allowOverlap() const { return AllowOverlap; }
166 bool isMemset() const { return IsMemset; }
167 bool isMemcpy() const { return !IsMemset; }
168 bool isMemcpyWithFixedDstAlign() const {
169 return isMemcpy() && !DstAlignCanChange;
170 }
171 bool isZeroMemset() const { return isMemset() && ZeroMemset; }
172 bool isMemcpyStrSrc() const {
173 assert(isMemcpy() && "Must be a memcpy");
174 return MemcpyStrSrc;
175 }
176 Align getSrcAlign() const {
177 assert(isMemcpy() && "Must be a memcpy");
178 return SrcAlign;
179 }
180 bool isSrcAligned(Align AlignCheck) const {
181 return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value());
182 }
183 bool isDstAligned(Align AlignCheck) const {
184 return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value());
185 }
186 bool isAligned(Align AlignCheck) const {
187 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
188 }
189};
190
191/// This base class for TargetLowering contains the SelectionDAG-independent
192/// parts that can be used from the rest of CodeGen.
193class TargetLoweringBase {
194public:
195 /// This enum indicates whether operations are valid for a target, and if not,
196 /// what action should be used to make them valid.
197 enum LegalizeAction : uint8_t {
198 Legal, // The target natively supports this operation.
199 Promote, // This operation should be executed in a larger type.
200 Expand, // Try to expand this to other ops, otherwise use a libcall.
201 LibCall, // Don't try to expand this to other ops, always use a libcall.
202 Custom // Use the LowerOperation hook to implement custom lowering.
203 };
204
205 /// This enum indicates whether a types are legal for a target, and if not,
206 /// what action should be used to make them valid.
207 enum LegalizeTypeAction : uint8_t {
208 TypeLegal, // The target natively supports this type.
209 TypePromoteInteger, // Replace this integer with a larger one.
210 TypeExpandInteger, // Split this integer into two of half the size.
211 TypeSoftenFloat, // Convert this float to a same size integer type.
212 TypeExpandFloat, // Split this float into two of half the size.
213 TypeScalarizeVector, // Replace this one-element vector with its element.
214 TypeSplitVector, // Split this vector into two of half the size.
215 TypeWidenVector, // This vector should be widened into a larger vector.
216 TypePromoteFloat, // Replace this float with a larger one.
217 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic.
218 TypeScalarizeScalableVector, // This action is explicitly left unimplemented.
219 // While it is theoretically possible to
220 // legalize operations on scalable types with a
221 // loop that handles the vscale * #lanes of the
222 // vector, this is non-trivial at SelectionDAG
223 // level and these types are better to be
224 // widened or promoted.
225 };
226
227 /// LegalizeKind holds the legalization kind that needs to happen to EVT
228 /// in order to type-legalize it.
229 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
230
231 /// Enum that describes how the target represents true/false values.
232 enum BooleanContent {
233 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
234 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
235 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
236 };
237
238 /// Enum that describes what type of support for selects the target has.
239 enum SelectSupportKind {
240 ScalarValSelect, // The target supports scalar selects (ex: cmov).
241 ScalarCondVectorVal, // The target supports selects with a scalar condition
242 // and vector values (ex: cmov).
243 VectorMaskSelect // The target supports vector selects with a vector
244 // mask (ex: x86 blends).
245 };
246
247 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
248 /// to, if at all. Exists because different targets have different levels of
249 /// support for these atomic instructions, and also have different options
250 /// w.r.t. what they should expand to.
251 enum class AtomicExpansionKind {
252 None, // Don't expand the instruction.
253 CastToInteger, // Cast the atomic instruction to another type, e.g. from
254 // floating-point to integer type.
255 LLSC, // Expand the instruction into loadlinked/storeconditional; used
256 // by ARM/AArch64.
257 LLOnly, // Expand the (load) instruction into just a load-linked, which has
258 // greater atomic guarantees than a normal load.
259 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
260 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
261 BitTestIntrinsic, // Use a target-specific intrinsic for special bit
262 // operations; used by X86.
263 CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
264 // operations; used by X86.
265 Expand, // Generic expansion in terms of other atomic operations.
266
267 // Rewrite to a non-atomic form for use in a known non-preemptible
268 // environment.
269 NotAtomic
270 };
271
272 /// Enum that specifies when a multiplication should be expanded.
273 enum class MulExpansionKind {
274 Always, // Always expand the instruction.
275 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
276 // or custom.
277 };
278
279 /// Enum that specifies when a float negation is beneficial.
280 enum class NegatibleCost {
281 Cheaper = 0, // Negated expression is cheaper.
282 Neutral = 1, // Negated expression has the same cost.
283 Expensive = 2 // Negated expression is more expensive.
284 };
285
286 /// Enum of different potentially desirable ways to fold (and/or (setcc ...),
287 /// (setcc ...)).
288 enum AndOrSETCCFoldKind : uint8_t {
289 None = 0, // No fold is preferable.
290 AddAnd = 1, // Fold with `Add` op and `And` op is preferable.
291 NotAnd = 2, // Fold with `Not` op and `And` op is preferable.
292 ABS = 4, // Fold with `llvm.abs` op is preferable.
293 };
294
295 class ArgListEntry {
296 public:
297 Value *Val = nullptr;
298 SDValue Node = SDValue();
299 Type *Ty = nullptr;
300 bool IsSExt : 1;
301 bool IsZExt : 1;
302 bool IsInReg : 1;
303 bool IsSRet : 1;
304 bool IsNest : 1;
305 bool IsByVal : 1;
306 bool IsByRef : 1;
307 bool IsInAlloca : 1;
308 bool IsPreallocated : 1;
309 bool IsReturned : 1;
310 bool IsSwiftSelf : 1;
311 bool IsSwiftAsync : 1;
312 bool IsSwiftError : 1;
313 bool IsCFGuardTarget : 1;
314 MaybeAlign Alignment = std::nullopt;
315 Type *IndirectType = nullptr;
316
317 ArgListEntry()
318 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
319 IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false),
320 IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
321 IsSwiftAsync(false), IsSwiftError(false), IsCFGuardTarget(false) {}
322
323 void setAttributes(const CallBase *Call, unsigned ArgIdx);
324 };
325 using ArgListTy = std::vector<ArgListEntry>;
326
327 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
328 ArgListTy &Args) const {};
329
330 static ISD::NodeType getExtendForContent(BooleanContent Content) {
331 switch (Content) {
332 case UndefinedBooleanContent:
333 // Extend by adding rubbish bits.
334 return ISD::ANY_EXTEND;
335 case ZeroOrOneBooleanContent:
336 // Extend by adding zero bits.
337 return ISD::ZERO_EXTEND;
338 case ZeroOrNegativeOneBooleanContent:
339 // Extend by copying the sign bit.
340 return ISD::SIGN_EXTEND;
341 }
342 llvm_unreachable("Invalid content kind");
343 }
344
345 explicit TargetLoweringBase(const TargetMachine &TM);
346 TargetLoweringBase(const TargetLoweringBase &) = delete;
347 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
348 virtual ~TargetLoweringBase() = default;
349
350 /// Return true if the target support strict float operation
351 bool isStrictFPEnabled() const {
352 return IsStrictFPEnabled;
353 }
354
355protected:
356 /// Initialize all of the actions to default values.
357 void initActions();
358
359public:
360 const TargetMachine &getTargetMachine() const { return TM; }
361
362 virtual bool useSoftFloat() const { return false; }
363
364 /// Return the pointer type for the given address space, defaults to
365 /// the pointer type from the data layout.
366 /// FIXME: The default needs to be removed once all the code is updated.
367 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
368 return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
369 }
370
371 /// Return the in-memory pointer type for the given address space, defaults to
372 /// the pointer type from the data layout.
373 /// FIXME: The default needs to be removed once all the code is updated.
374 virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
375 return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
376 }
377
378 /// Return the type for frame index, which is determined by
379 /// the alloca address space specified through the data layout.
380 MVT getFrameIndexTy(const DataLayout &DL) const {
381 return getPointerTy(DL, AS: DL.getAllocaAddrSpace());
382 }
383
384 /// Return the type for code pointers, which is determined by the program
385 /// address space specified through the data layout.
386 MVT getProgramPointerTy(const DataLayout &DL) const {
387 return getPointerTy(DL, AS: DL.getProgramAddressSpace());
388 }
389
390 /// Return the type for operands of fence.
391 /// TODO: Let fence operands be of i32 type and remove this.
392 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
393 return getPointerTy(DL);
394 }
395
396 /// Return the type to use for a scalar shift opcode, given the shifted amount
397 /// type. Targets should return a legal type if the input type is legal.
398 /// Targets can return a type that is too small if the input type is illegal.
399 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
400
401 /// Returns the type for the shift amount of a shift opcode. For vectors,
402 /// returns the input type. For scalars, behavior depends on \p LegalTypes. If
403 /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses
404 /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent
405 /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes
406 /// should be set to true for calls during type legalization and after type
407 /// legalization has been completed.
408 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
409 bool LegalTypes = true) const;
410
411 /// Return the preferred type to use for a shift opcode, given the shifted
412 /// amount type is \p ShiftValueTy.
413 LLVM_READONLY
414 virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const {
415 return ShiftValueTy;
416 }
417
418 /// Returns the type to be used for the index operand of:
419 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
420 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
421 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
422 return getPointerTy(DL);
423 }
424
425 /// Returns the type to be used for the EVL/AVL operand of VP nodes:
426 /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type,
427 /// and must be at least as large as i32. The EVL is implicitly zero-extended
428 /// to any larger type.
429 virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; }
430
431 /// This callback is used to inspect load/store instructions and add
432 /// target-specific MachineMemOperand flags to them. The default
433 /// implementation does nothing.
434 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const {
435 return MachineMemOperand::MONone;
436 }
437
438 /// This callback is used to inspect load/store SDNode.
439 /// The default implementation does nothing.
440 virtual MachineMemOperand::Flags
441 getTargetMMOFlags(const MemSDNode &Node) const {
442 return MachineMemOperand::MONone;
443 }
444
445 MachineMemOperand::Flags
446 getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL,
447 AssumptionCache *AC = nullptr,
448 const TargetLibraryInfo *LibInfo = nullptr) const;
449 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
450 const DataLayout &DL) const;
451 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
452 const DataLayout &DL) const;
453
454 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
455 return true;
456 }
457
458 /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
459 /// using generic code in SelectionDAGBuilder.
460 virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const {
461 return true;
462 }
463
464 virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF,
465 bool IsScalable) const {
466 return true;
467 }
468
469 /// Return true if the @llvm.experimental.cttz.elts intrinsic should be
470 /// expanded using generic code in SelectionDAGBuilder.
471 virtual bool shouldExpandCttzElements(EVT VT) const { return true; }
472
473 // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
474 // vecreduce(op(x, y)) for the reduction opcode RedOpc.
475 virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
476 return true;
477 }
478
479 /// Return true if it is profitable to convert a select of FP constants into
480 /// a constant pool load whose address depends on the select condition. The
481 /// parameter may be used to differentiate a select with FP compare from
482 /// integer compare.
483 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
484 return true;
485 }
486
487 /// Return true if multiple condition registers are available.
488 bool hasMultipleConditionRegisters() const {
489 return HasMultipleConditionRegisters;
490 }
491
492 /// Return true if the target has BitExtract instructions.
493 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
494
495 /// Return the preferred vector type legalization action.
496 virtual TargetLoweringBase::LegalizeTypeAction
497 getPreferredVectorAction(MVT VT) const {
498 // The default action for one element vectors is to scalarize
499 if (VT.getVectorElementCount().isScalar())
500 return TypeScalarizeVector;
501 // The default action for an odd-width vector is to widen.
502 if (!VT.isPow2VectorType())
503 return TypeWidenVector;
504 // The default action for other vectors is to promote
505 return TypePromoteInteger;
506 }
507
508 // Return true if the half type should be promoted using soft promotion rules
509 // where each operation is promoted to f32 individually, then converted to
510 // fp16. The default behavior is to promote chains of operations, keeping
511 // intermediate results in f32 precision and range.
512 virtual bool softPromoteHalfType() const { return false; }
513
514 // Return true if, for soft-promoted half, the half type should be passed
515 // passed to and returned from functions as f32. The default behavior is to
516 // pass as i16. If soft-promoted half is not used, this function is ignored
517 // and values are always passed and returned as f32.
518 virtual bool useFPRegsForHalfType() const { return false; }
519
520 // There are two general methods for expanding a BUILD_VECTOR node:
521 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
522 // them together.
523 // 2. Build the vector on the stack and then load it.
524 // If this function returns true, then method (1) will be used, subject to
525 // the constraint that all of the necessary shuffles are legal (as determined
526 // by isShuffleMaskLegal). If this function returns false, then method (2) is
527 // always used. The vector type, and the number of defined values, are
528 // provided.
529 virtual bool
530 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
531 unsigned DefinedValues) const {
532 return DefinedValues < 3;
533 }
534
535 /// Return true if integer divide is usually cheaper than a sequence of
536 /// several shifts, adds, and multiplies for this target.
537 /// The definition of "cheaper" may depend on whether we're optimizing
538 /// for speed or for size.
539 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
540
541 /// Return true if the target can handle a standalone remainder operation.
542 virtual bool hasStandaloneRem(EVT VT) const {
543 return true;
544 }
545
546 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
547 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
548 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
549 return false;
550 }
551
552 /// Reciprocal estimate status values used by the functions below.
553 enum ReciprocalEstimate : int {
554 Unspecified = -1,
555 Disabled = 0,
556 Enabled = 1
557 };
558
559 /// Return a ReciprocalEstimate enum value for a square root of the given type
560 /// based on the function's attributes. If the operation is not overridden by
561 /// the function's attributes, "Unspecified" is returned and target defaults
562 /// are expected to be used for instruction selection.
563 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
564
565 /// Return a ReciprocalEstimate enum value for a division of the given type
566 /// based on the function's attributes. If the operation is not overridden by
567 /// the function's attributes, "Unspecified" is returned and target defaults
568 /// are expected to be used for instruction selection.
569 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
570
571 /// Return the refinement step count for a square root of the given type based
572 /// on the function's attributes. If the operation is not overridden by
573 /// the function's attributes, "Unspecified" is returned and target defaults
574 /// are expected to be used for instruction selection.
575 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
576
577 /// Return the refinement step count for a division of the given type based
578 /// on the function's attributes. If the operation is not overridden by
579 /// the function's attributes, "Unspecified" is returned and target defaults
580 /// are expected to be used for instruction selection.
581 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
582
583 /// Returns true if target has indicated at least one type should be bypassed.
584 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
585
586 /// Returns map of slow types for division or remainder with corresponding
587 /// fast types
588 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
589 return BypassSlowDivWidths;
590 }
591
592 /// Return true only if vscale must be a power of two.
593 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
594
595 /// Return true if Flow Control is an expensive operation that should be
596 /// avoided.
597 bool isJumpExpensive() const { return JumpIsExpensive; }
598
599 // Costs parameters used by
600 // SelectionDAGBuilder::shouldKeepJumpConditionsTogether.
601 // shouldKeepJumpConditionsTogether will use these parameter value to
602 // determine if two conditions in the form `br (and/or cond1, cond2)` should
603 // be split into two branches or left as one.
604 //
605 // BaseCost is the cost threshold (in latency). If the estimated latency of
606 // computing both `cond1` and `cond2` is below the cost of just computing
607 // `cond1` + BaseCost, the two conditions will be kept together. Otherwise
608 // they will be split.
609 //
610 // LikelyBias increases BaseCost if branch probability info indicates that it
611 // is likely that both `cond1` and `cond2` will be computed.
612 //
613 // UnlikelyBias decreases BaseCost if branch probability info indicates that
614 // it is likely that both `cond1` and `cond2` will be computed.
615 //
616 // Set any field to -1 to make it ignored (setting BaseCost to -1 results in
617 // `shouldKeepJumpConditionsTogether` always returning false).
618 struct CondMergingParams {
619 int BaseCost;
620 int LikelyBias;
621 int UnlikelyBias;
622 };
623 // Return params for deciding if we should keep two branch conditions merged
624 // or split them into two separate branches.
625 // Arg0: The binary op joining the two conditions (and/or).
626 // Arg1: The first condition (cond1)
627 // Arg2: The second condition (cond2)
628 virtual CondMergingParams
629 getJumpConditionMergingParams(Instruction::BinaryOps, const Value *,
630 const Value *) const {
631 // -1 will always result in splitting.
632 return {.BaseCost: -1, .LikelyBias: -1, .UnlikelyBias: -1};
633 }
634
635 /// Return true if selects are only cheaper than branches if the branch is
636 /// unlikely to be predicted right.
637 bool isPredictableSelectExpensive() const {
638 return PredictableSelectIsExpensive;
639 }
640
641 virtual bool fallBackToDAGISel(const Instruction &Inst) const {
642 return false;
643 }
644
645 /// Return true if the following transform is beneficial:
646 /// fold (conv (load x)) -> (load (conv*)x)
647 /// On architectures that don't natively support some vector loads
648 /// efficiently, casting the load to a smaller vector of larger types and
649 /// loading is more efficient, however, this can be undone by optimizations in
650 /// dag combiner.
651 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
652 const SelectionDAG &DAG,
653 const MachineMemOperand &MMO) const;
654
655 /// Return true if the following transform is beneficial:
656 /// (store (y (conv x)), y*)) -> (store x, (x*))
657 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
658 const SelectionDAG &DAG,
659 const MachineMemOperand &MMO) const {
660 // Default to the same logic as loads.
661 return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO);
662 }
663
664 /// Return true if it is expected to be cheaper to do a store of vector
665 /// constant with the given size and type for the address space than to
666 /// store the individual scalar element constants.
667 virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
668 unsigned NumElem,
669 unsigned AddrSpace) const {
670 return IsZero;
671 }
672
673 /// Allow store merging for the specified type after legalization in addition
674 /// to before legalization. This may transform stores that do not exist
675 /// earlier (for example, stores created from intrinsics).
676 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
677 return true;
678 }
679
680 /// Returns if it's reasonable to merge stores to MemVT size.
681 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
682 const MachineFunction &MF) const {
683 return true;
684 }
685
686 /// Return true if it is cheap to speculate a call to intrinsic cttz.
687 virtual bool isCheapToSpeculateCttz(Type *Ty) const {
688 return false;
689 }
690
691 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
692 virtual bool isCheapToSpeculateCtlz(Type *Ty) const {
693 return false;
694 }
695
696 /// Return true if ctlz instruction is fast.
697 virtual bool isCtlzFast() const {
698 return false;
699 }
700
701 /// Return true if ctpop instruction is fast.
702 virtual bool isCtpopFast(EVT VT) const {
703 return isOperationLegal(Op: ISD::CTPOP, VT);
704 }
705
706 /// Return the maximum number of "x & (x - 1)" operations that can be done
707 /// instead of deferring to a custom CTPOP.
708 virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const {
709 return 1;
710 }
711
712 /// Return true if instruction generated for equality comparison is folded
713 /// with instruction generated for signed comparison.
714 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
715
716 /// Return true if the heuristic to prefer icmp eq zero should be used in code
717 /// gen prepare.
718 virtual bool preferZeroCompareBranch() const { return false; }
719
720 /// Return true if it is cheaper to split the store of a merged int val
721 /// from a pair of smaller values into multiple stores.
722 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
723 return false;
724 }
725
726 /// Return if the target supports combining a
727 /// chain like:
728 /// \code
729 /// %andResult = and %val1, #mask
730 /// %icmpResult = icmp %andResult, 0
731 /// \endcode
732 /// into a single machine instruction of a form like:
733 /// \code
734 /// cc = test %register, #mask
735 /// \endcode
736 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
737 return false;
738 }
739
740 /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
741 virtual bool
742 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
743 const MemSDNode &NodeY) const {
744 return true;
745 }
746
747 /// Use bitwise logic to make pairs of compares more efficient. For example:
748 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
749 /// This should be true when it takes more than one instruction to lower
750 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
751 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
752 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
753 return false;
754 }
755
756 /// Return the preferred operand type if the target has a quick way to compare
757 /// integer values of the given size. Assume that any legal integer type can
758 /// be compared efficiently. Targets may override this to allow illegal wide
759 /// types to return a vector type if there is support to compare that type.
760 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
761 MVT VT = MVT::getIntegerVT(BitWidth: NumBits);
762 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
763 }
764
765 /// Return true if the target should transform:
766 /// (X & Y) == Y ---> (~X & Y) == 0
767 /// (X & Y) != Y ---> (~X & Y) != 0
768 ///
769 /// This may be profitable if the target has a bitwise and-not operation that
770 /// sets comparison flags. A target may want to limit the transformation based
771 /// on the type of Y or if Y is a constant.
772 ///
773 /// Note that the transform will not occur if Y is known to be a power-of-2
774 /// because a mask and compare of a single bit can be handled by inverting the
775 /// predicate, for example:
776 /// (X & 8) == 8 ---> (X & 8) != 0
777 virtual bool hasAndNotCompare(SDValue Y) const {
778 return false;
779 }
780
781 /// Return true if the target has a bitwise and-not operation:
782 /// X = ~A & B
783 /// This can be used to simplify select or other instructions.
784 virtual bool hasAndNot(SDValue X) const {
785 // If the target has the more complex version of this operation, assume that
786 // it has this operation too.
787 return hasAndNotCompare(Y: X);
788 }
789
790 /// Return true if the target has a bit-test instruction:
791 /// (X & (1 << Y)) ==/!= 0
792 /// This knowledge can be used to prevent breaking the pattern,
793 /// or creating it if it could be recognized.
794 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
795
796 /// There are two ways to clear extreme bits (either low or high):
797 /// Mask: x & (-1 << y) (the instcombine canonical form)
798 /// Shifts: x >> y << y
799 /// Return true if the variant with 2 variable shifts is preferred.
800 /// Return false if there is no preference.
801 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
802 // By default, let's assume that no one prefers shifts.
803 return false;
804 }
805
806 /// Return true if it is profitable to fold a pair of shifts into a mask.
807 /// This is usually true on most targets. But some targets, like Thumb1,
808 /// have immediate shift instructions, but no immediate "and" instruction;
809 /// this makes the fold unprofitable.
810 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
811 CombineLevel Level) const {
812 return true;
813 }
814
815 /// Should we tranform the IR-optimal check for whether given truncation
816 /// down into KeptBits would be truncating or not:
817 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
818 /// Into it's more traditional form:
819 /// ((%x << C) a>> C) dstcond %x
820 /// Return true if we should transform.
821 /// Return false if there is no preference.
822 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
823 unsigned KeptBits) const {
824 // By default, let's assume that no one prefers shifts.
825 return false;
826 }
827
828 /// Given the pattern
829 /// (X & (C l>>/<< Y)) ==/!= 0
830 /// return true if it should be transformed into:
831 /// ((X <</l>> Y) & C) ==/!= 0
832 /// WARNING: if 'X' is a constant, the fold may deadlock!
833 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
834 /// here because it can end up being not linked in.
835 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
836 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
837 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
838 SelectionDAG &DAG) const {
839 if (hasBitTest(X, Y)) {
840 // One interesting pattern that we'd want to form is 'bit test':
841 // ((1 << Y) & C) ==/!= 0
842 // But we also need to be careful not to try to reverse that fold.
843
844 // Is this '1 << Y' ?
845 if (OldShiftOpcode == ISD::SHL && CC->isOne())
846 return false; // Keep the 'bit test' pattern.
847
848 // Will it be '1 << Y' after the transform ?
849 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
850 return true; // Do form the 'bit test' pattern.
851 }
852
853 // If 'X' is a constant, and we transform, then we will immediately
854 // try to undo the fold, thus causing endless combine loop.
855 // So by default, let's assume everyone prefers the fold
856 // iff 'X' is not a constant.
857 return !XC;
858 }
859
860 // Return true if its desirable to perform the following transform:
861 // (fmul C, (uitofp Pow2))
862 // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
863 // (fdiv C, (uitofp Pow2))
864 // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
865 //
866 // This is only queried after we have verified the transform will be bitwise
867 // equals.
868 //
869 // SDNode *N : The FDiv/FMul node we want to transform.
870 // SDValue FPConst: The Float constant operand in `N`.
871 // SDValue IntPow2: The Integer power of 2 operand in `N`.
872 virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
873 SDValue IntPow2) const {
874 // Default to avoiding fdiv which is often very expensive.
875 return N->getOpcode() == ISD::FDIV;
876 }
877
878 // Given:
879 // (icmp eq/ne (and X, C0), (shift X, C1))
880 // or
881 // (icmp eq/ne X, (rotate X, CPow2))
882
883 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
884 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
885 // Do we prefer the shift to be shift-right, shift-left, or rotate.
886 // Note: Its only valid to convert the rotate version to the shift version iff
887 // the shift-amt (`C1`) is a power of 2 (including 0).
888 // If ShiftOpc (current Opcode) is returned, do nothing.
889 virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(
890 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
891 const APInt &ShiftOrRotateAmt,
892 const std::optional<APInt> &AndMask) const {
893 return ShiftOpc;
894 }
895
896 /// These two forms are equivalent:
897 /// sub %y, (xor %x, -1)
898 /// add (add %x, 1), %y
899 /// The variant with two add's is IR-canonical.
900 /// Some targets may prefer one to the other.
901 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
902 // By default, let's assume that everyone prefers the form with two add's.
903 return true;
904 }
905
906 // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets
907 // may want to avoid this to prevent loss of sub_nsw pattern.
908 virtual bool preferABDSToABSWithNSW(EVT VT) const {
909 return true;
910 }
911
912 // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X))
913 virtual bool preferScalarizeSplat(SDNode *N) const { return true; }
914
915 // Return true if the target wants to transform:
916 // (TruncVT truncate(sext_in_reg(VT X, ExtVT))
917 // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT))
918 // Some targets might prefer pre-sextinreg to improve truncation/saturation.
919 virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const {
920 return true;
921 }
922
923 /// Return true if the target wants to use the optimization that
924 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
925 /// promotedInst1(...(promotedInstN(ext(load)))).
926 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
927
928 /// Return true if the target can combine store(extractelement VectorTy,
929 /// Idx).
930 /// \p Cost[out] gives the cost of that transformation when this is true.
931 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
932 unsigned &Cost) const {
933 return false;
934 }
935
936 /// Return true if the target shall perform extract vector element and store
937 /// given that the vector is known to be splat of constant.
938 /// \p Index[out] gives the index of the vector element to be extracted when
939 /// this is true.
940 virtual bool shallExtractConstSplatVectorElementToStore(
941 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
942 return false;
943 }
944
945 /// Return true if inserting a scalar into a variable element of an undef
946 /// vector is more efficiently handled by splatting the scalar instead.
947 virtual bool shouldSplatInsEltVarIndex(EVT) const {
948 return false;
949 }
950
951 /// Return true if target always benefits from combining into FMA for a
952 /// given value type. This must typically return false on targets where FMA
953 /// takes more cycles to execute than FADD.
954 virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; }
955
956 /// Return true if target always benefits from combining into FMA for a
957 /// given value type. This must typically return false on targets where FMA
958 /// takes more cycles to execute than FADD.
959 virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; }
960
961 /// Return the ValueType of the result of SETCC operations.
962 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
963 EVT VT) const;
964
965 /// Return the ValueType for comparison libcalls. Comparison libcalls include
966 /// floating point comparison calls, and Ordered/Unordered check calls on
967 /// floating point numbers.
968 virtual
969 MVT::SimpleValueType getCmpLibcallReturnType() const;
970
971 /// For targets without i1 registers, this gives the nature of the high-bits
972 /// of boolean values held in types wider than i1.
973 ///
974 /// "Boolean values" are special true/false values produced by nodes like
975 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
976 /// Not to be confused with general values promoted from i1. Some cpus
977 /// distinguish between vectors of boolean and scalars; the isVec parameter
978 /// selects between the two kinds. For example on X86 a scalar boolean should
979 /// be zero extended from i1, while the elements of a vector of booleans
980 /// should be sign extended from i1.
981 ///
982 /// Some cpus also treat floating point types the same way as they treat
983 /// vectors instead of the way they treat scalars.
984 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
985 if (isVec)
986 return BooleanVectorContents;
987 return isFloat ? BooleanFloatContents : BooleanContents;
988 }
989
990 BooleanContent getBooleanContents(EVT Type) const {
991 return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint());
992 }
993
994 /// Promote the given target boolean to a target boolean of the given type.
995 /// A target boolean is an integer value, not necessarily of type i1, the bits
996 /// of which conform to getBooleanContents.
997 ///
998 /// ValVT is the type of values that produced the boolean.
999 SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool,
1000 EVT ValVT) const {
1001 SDLoc dl(Bool);
1002 EVT BoolVT =
1003 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT);
1004 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT));
1005 return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool);
1006 }
1007
1008 /// Return target scheduling preference.
1009 Sched::Preference getSchedulingPreference() const {
1010 return SchedPreferenceInfo;
1011 }
1012
1013 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
1014 /// for different nodes. This function returns the preference (or none) for
1015 /// the given node.
1016 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
1017 return Sched::None;
1018 }
1019
1020 /// Return the register class that should be used for the specified value
1021 /// type.
1022 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
1023 (void)isDivergent;
1024 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
1025 assert(RC && "This value type is not natively supported!");
1026 return RC;
1027 }
1028
1029 /// Allows target to decide about the register class of the
1030 /// specific value that is live outside the defining block.
1031 /// Returns true if the value needs uniform register class.
1032 virtual bool requiresUniformRegister(MachineFunction &MF,
1033 const Value *) const {
1034 return false;
1035 }
1036
1037 /// Return the 'representative' register class for the specified value
1038 /// type.
1039 ///
1040 /// The 'representative' register class is the largest legal super-reg
1041 /// register class for the register class of the value type. For example, on
1042 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
1043 /// register class is GR64 on x86_64.
1044 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
1045 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
1046 return RC;
1047 }
1048
1049 /// Return the cost of the 'representative' register class for the specified
1050 /// value type.
1051 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
1052 return RepRegClassCostForVT[VT.SimpleTy];
1053 }
1054
1055 /// Return the preferred strategy to legalize tihs SHIFT instruction, with
1056 /// \p ExpansionFactor being the recursion depth - how many expansion needed.
1057 enum class ShiftLegalizationStrategy {
1058 ExpandToParts,
1059 ExpandThroughStack,
1060 LowerToLibcall
1061 };
1062 virtual ShiftLegalizationStrategy
1063 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1064 unsigned ExpansionFactor) const {
1065 if (ExpansionFactor == 1)
1066 return ShiftLegalizationStrategy::ExpandToParts;
1067 return ShiftLegalizationStrategy::ExpandThroughStack;
1068 }
1069
1070 /// Return true if the target has native support for the specified value type.
1071 /// This means that it has a register that directly holds it without
1072 /// promotions or expansions.
1073 bool isTypeLegal(EVT VT) const {
1074 assert(!VT.isSimple() ||
1075 (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT));
1076 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
1077 }
1078
1079 class ValueTypeActionImpl {
1080 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
1081 /// that indicates how instruction selection should deal with the type.
1082 LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE];
1083
1084 public:
1085 ValueTypeActionImpl() {
1086 std::fill(first: std::begin(arr&: ValueTypeActions), last: std::end(arr&: ValueTypeActions),
1087 value: TypeLegal);
1088 }
1089
1090 LegalizeTypeAction getTypeAction(MVT VT) const {
1091 return ValueTypeActions[VT.SimpleTy];
1092 }
1093
1094 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
1095 ValueTypeActions[VT.SimpleTy] = Action;
1096 }
1097 };
1098
1099 const ValueTypeActionImpl &getValueTypeActions() const {
1100 return ValueTypeActions;
1101 }
1102
1103 /// Return pair that represents the legalization kind (first) that needs to
1104 /// happen to EVT (second) in order to type-legalize it.
1105 ///
1106 /// First: how we should legalize values of this type, either it is already
1107 /// legal (return 'Legal') or we need to promote it to a larger type (return
1108 /// 'Promote'), or we need to expand it into multiple registers of smaller
1109 /// integer type (return 'Expand'). 'Custom' is not an option.
1110 ///
1111 /// Second: for types supported by the target, this is an identity function.
1112 /// For types that must be promoted to larger types, this returns the larger
1113 /// type to promote to. For integer types that are larger than the largest
1114 /// integer register, this contains one step in the expansion to get to the
1115 /// smaller register. For illegal floating point types, this returns the
1116 /// integer type to transform to.
1117 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
1118
1119 /// Return how we should legalize values of this type, either it is already
1120 /// legal (return 'Legal') or we need to promote it to a larger type (return
1121 /// 'Promote'), or we need to expand it into multiple registers of smaller
1122 /// integer type (return 'Expand'). 'Custom' is not an option.
1123 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
1124 return getTypeConversion(Context, VT).first;
1125 }
1126 LegalizeTypeAction getTypeAction(MVT VT) const {
1127 return ValueTypeActions.getTypeAction(VT);
1128 }
1129
1130 /// For types supported by the target, this is an identity function. For
1131 /// types that must be promoted to larger types, this returns the larger type
1132 /// to promote to. For integer types that are larger than the largest integer
1133 /// register, this contains one step in the expansion to get to the smaller
1134 /// register. For illegal floating point types, this returns the integer type
1135 /// to transform to.
1136 virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
1137 return getTypeConversion(Context, VT).second;
1138 }
1139
1140 /// For types supported by the target, this is an identity function. For
1141 /// types that must be expanded (i.e. integer types that are larger than the
1142 /// largest integer register or illegal floating point types), this returns
1143 /// the largest legal type it will be expanded to.
1144 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
1145 assert(!VT.isVector());
1146 while (true) {
1147 switch (getTypeAction(Context, VT)) {
1148 case TypeLegal:
1149 return VT;
1150 case TypeExpandInteger:
1151 VT = getTypeToTransformTo(Context, VT);
1152 break;
1153 default:
1154 llvm_unreachable("Type is not legal nor is it to be expanded!");
1155 }
1156 }
1157 }
1158
1159 /// Vector types are broken down into some number of legal first class types.
1160 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
1161 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
1162 /// turns into 4 EVT::i32 values with both PPC and X86.
1163 ///
1164 /// This method returns the number of registers needed, and the VT for each
1165 /// register. It also returns the VT and quantity of the intermediate values
1166 /// before they are promoted/expanded.
1167 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
1168 EVT &IntermediateVT,
1169 unsigned &NumIntermediates,
1170 MVT &RegisterVT) const;
1171
1172 /// Certain targets such as MIPS require that some types such as vectors are
1173 /// always broken down into scalars in some contexts. This occurs even if the
1174 /// vector type is legal.
1175 virtual unsigned getVectorTypeBreakdownForCallingConv(
1176 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1177 unsigned &NumIntermediates, MVT &RegisterVT) const {
1178 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
1179 RegisterVT);
1180 }
1181
1182 struct IntrinsicInfo {
1183 unsigned opc = 0; // target opcode
1184 EVT memVT; // memory VT
1185
1186 // value representing memory location
1187 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
1188
1189 // Fallback address space for use if ptrVal is nullptr. std::nullopt means
1190 // unknown address space.
1191 std::optional<unsigned> fallbackAddressSpace;
1192
1193 int offset = 0; // offset off of ptrVal
1194 uint64_t size = 0; // the size of the memory location
1195 // (taken from memVT if zero)
1196 MaybeAlign align = Align(1); // alignment
1197
1198 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
1199 IntrinsicInfo() = default;
1200 };
1201
1202 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1203 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1204 /// true and store the intrinsic information into the IntrinsicInfo that was
1205 /// passed to the function.
1206 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
1207 MachineFunction &,
1208 unsigned /*Intrinsic*/) const {
1209 return false;
1210 }
1211
1212 /// Returns true if the target can instruction select the specified FP
1213 /// immediate natively. If false, the legalizer will materialize the FP
1214 /// immediate as a load from a constant pool.
1215 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
1216 bool ForCodeSize = false) const {
1217 return false;
1218 }
1219
1220 /// Targets can use this to indicate that they only support *some*
1221 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1222 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
1223 /// legal.
1224 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
1225 return true;
1226 }
1227
1228 /// Returns true if the operation can trap for the value type.
1229 ///
1230 /// VT must be a legal type. By default, we optimistically assume most
1231 /// operations don't trap except for integer divide and remainder.
1232 virtual bool canOpTrap(unsigned Op, EVT VT) const;
1233
1234 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1235 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1236 /// constant pool entry.
1237 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
1238 EVT /*VT*/) const {
1239 return false;
1240 }
1241
1242 /// How to legalize this custom operation?
1243 virtual LegalizeAction getCustomOperationAction(SDNode &Op) const {
1244 return Legal;
1245 }
1246
1247 /// Return how this operation should be treated: either it is legal, needs to
1248 /// be promoted to a larger size, needs to be expanded to some other code
1249 /// sequence, or the target has a custom expander for it.
1250 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
1251 if (VT.isExtended()) return Expand;
1252 // If a target-specific SDNode requires legalization, require the target
1253 // to provide custom legalization for it.
1254 if (Op >= std::size(OpActions[0]))
1255 return Custom;
1256 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
1257 }
1258
1259 /// Custom method defined by each target to indicate if an operation which
1260 /// may require a scale is supported natively by the target.
1261 /// If not, the operation is illegal.
1262 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
1263 unsigned Scale) const {
1264 return false;
1265 }
1266
1267 /// Some fixed point operations may be natively supported by the target but
1268 /// only for specific scales. This method allows for checking
1269 /// if the width is supported by the target for a given operation that may
1270 /// depend on scale.
1271 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
1272 unsigned Scale) const {
1273 auto Action = getOperationAction(Op, VT);
1274 if (Action != Legal)
1275 return Action;
1276
1277 // This operation is supported in this type but may only work on specific
1278 // scales.
1279 bool Supported;
1280 switch (Op) {
1281 default:
1282 llvm_unreachable("Unexpected fixed point operation.");
1283 case ISD::SMULFIX:
1284 case ISD::SMULFIXSAT:
1285 case ISD::UMULFIX:
1286 case ISD::UMULFIXSAT:
1287 case ISD::SDIVFIX:
1288 case ISD::SDIVFIXSAT:
1289 case ISD::UDIVFIX:
1290 case ISD::UDIVFIXSAT:
1291 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
1292 break;
1293 }
1294
1295 return Supported ? Action : Expand;
1296 }
1297
1298 // If Op is a strict floating-point operation, return the result
1299 // of getOperationAction for the equivalent non-strict operation.
1300 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
1301 unsigned EqOpc;
1302 switch (Op) {
1303 default: llvm_unreachable("Unexpected FP pseudo-opcode");
1304#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1305 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
1306#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1307 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
1308#include "llvm/IR/ConstrainedOps.def"
1309 }
1310
1311 return getOperationAction(Op: EqOpc, VT);
1312 }
1313
1314 /// Return true if the specified operation is legal on this target or can be
1315 /// made legal with custom lowering. This is used to help guide high-level
1316 /// lowering decisions. LegalOnly is an optional convenience for code paths
1317 /// traversed pre and post legalisation.
1318 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1319 bool LegalOnly = false) const {
1320 if (LegalOnly)
1321 return isOperationLegal(Op, VT);
1322
1323 return (VT == MVT::Other || isTypeLegal(VT)) &&
1324 (getOperationAction(Op, VT) == Legal ||
1325 getOperationAction(Op, VT) == Custom);
1326 }
1327
1328 /// Return true if the specified operation is legal on this target or can be
1329 /// made legal using promotion. This is used to help guide high-level lowering
1330 /// decisions. LegalOnly is an optional convenience for code paths traversed
1331 /// pre and post legalisation.
1332 bool isOperationLegalOrPromote(unsigned Op, EVT VT,
1333 bool LegalOnly = false) const {
1334 if (LegalOnly)
1335 return isOperationLegal(Op, VT);
1336
1337 return (VT == MVT::Other || isTypeLegal(VT)) &&
1338 (getOperationAction(Op, VT) == Legal ||
1339 getOperationAction(Op, VT) == Promote);
1340 }
1341
1342 /// Return true if the specified operation is legal on this target or can be
1343 /// made legal with custom lowering or using promotion. This is used to help
1344 /// guide high-level lowering decisions. LegalOnly is an optional convenience
1345 /// for code paths traversed pre and post legalisation.
1346 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT,
1347 bool LegalOnly = false) const {
1348 if (LegalOnly)
1349 return isOperationLegal(Op, VT);
1350
1351 return (VT == MVT::Other || isTypeLegal(VT)) &&
1352 (getOperationAction(Op, VT) == Legal ||
1353 getOperationAction(Op, VT) == Custom ||
1354 getOperationAction(Op, VT) == Promote);
1355 }
1356
1357 /// Return true if the operation uses custom lowering, regardless of whether
1358 /// the type is legal or not.
1359 bool isOperationCustom(unsigned Op, EVT VT) const {
1360 return getOperationAction(Op, VT) == Custom;
1361 }
1362
1363 /// Return true if lowering to a jump table is allowed.
1364 virtual bool areJTsAllowed(const Function *Fn) const {
1365 if (Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool())
1366 return false;
1367
1368 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1369 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1370 }
1371
1372 /// Check whether the range [Low,High] fits in a machine word.
1373 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1374 const DataLayout &DL) const {
1375 // FIXME: Using the pointer type doesn't seem ideal.
1376 uint64_t BW = DL.getIndexSizeInBits(AS: 0u);
1377 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
1378 return Range <= BW;
1379 }
1380
1381 /// Return true if lowering to a jump table is suitable for a set of case
1382 /// clusters which may contain \p NumCases cases, \p Range range of values.
1383 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1384 uint64_t Range, ProfileSummaryInfo *PSI,
1385 BlockFrequencyInfo *BFI) const;
1386
1387 /// Returns preferred type for switch condition.
1388 virtual MVT getPreferredSwitchConditionType(LLVMContext &Context,
1389 EVT ConditionVT) const;
1390
1391 /// Return true if lowering to a bit test is suitable for a set of case
1392 /// clusters which contains \p NumDests unique destinations, \p Low and
1393 /// \p High as its lowest and highest case values, and expects \p NumCmps
1394 /// case value comparisons. Check if the number of destinations, comparison
1395 /// metric, and range are all suitable.
1396 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1397 const APInt &Low, const APInt &High,
1398 const DataLayout &DL) const {
1399 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1400 // range of cases both require only one branch to lower. Just looking at the
1401 // number of clusters and destinations should be enough to decide whether to
1402 // build bit tests.
1403
1404 // To lower a range with bit tests, the range must fit the bitwidth of a
1405 // machine word.
1406 if (!rangeFitsInWord(Low, High, DL))
1407 return false;
1408
1409 // Decide whether it's profitable to lower this range with bit tests. Each
1410 // destination requires a bit test and branch, and there is an overall range
1411 // check branch. For a small number of clusters, separate comparisons might
1412 // be cheaper, and for many destinations, splitting the range might be
1413 // better.
1414 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1415 (NumDests == 3 && NumCmps >= 6);
1416 }
1417
1418 /// Return true if the specified operation is illegal on this target or
1419 /// unlikely to be made legal with custom lowering. This is used to help guide
1420 /// high-level lowering decisions.
1421 bool isOperationExpand(unsigned Op, EVT VT) const {
1422 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1423 }
1424
1425 /// Return true if the specified operation is legal on this target.
1426 bool isOperationLegal(unsigned Op, EVT VT) const {
1427 return (VT == MVT::Other || isTypeLegal(VT)) &&
1428 getOperationAction(Op, VT) == Legal;
1429 }
1430
1431 /// Return how this load with extension should be treated: either it is legal,
1432 /// needs to be promoted to a larger size, needs to be expanded to some other
1433 /// code sequence, or the target has a custom expander for it.
1434 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1435 EVT MemVT) const {
1436 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1437 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1438 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1439 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
1440 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
1441 unsigned Shift = 4 * ExtType;
1442 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1443 }
1444
1445 /// Return true if the specified load with extension is legal on this target.
1446 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1447 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1448 }
1449
1450 /// Return true if the specified load with extension is legal or custom
1451 /// on this target.
1452 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1453 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1454 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1455 }
1456
1457 /// Same as getLoadExtAction, but for atomic loads.
1458 LegalizeAction getAtomicLoadExtAction(unsigned ExtType, EVT ValVT,
1459 EVT MemVT) const {
1460 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1461 unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy;
1462 unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy;
1463 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE &&
1464 MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!");
1465 unsigned Shift = 4 * ExtType;
1466 LegalizeAction Action =
1467 (LegalizeAction)((AtomicLoadExtActions[ValI][MemI] >> Shift) & 0xf);
1468 assert((Action == Legal || Action == Expand) &&
1469 "Unsupported atomic load extension action.");
1470 return Action;
1471 }
1472
1473 /// Return true if the specified atomic load with extension is legal on
1474 /// this target.
1475 bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1476 return getAtomicLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1477 }
1478
1479 /// Return how this store with truncation should be treated: either it is
1480 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1481 /// other code sequence, or the target has a custom expander for it.
1482 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1483 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1484 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1485 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1486 assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE &&
1487 "Table isn't big enough!");
1488 return TruncStoreActions[ValI][MemI];
1489 }
1490
1491 /// Return true if the specified store with truncation is legal on this
1492 /// target.
1493 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1494 return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1495 }
1496
1497 /// Return true if the specified store with truncation has solution on this
1498 /// target.
1499 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1500 return isTypeLegal(VT: ValVT) &&
1501 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1502 getTruncStoreAction(ValVT, MemVT) == Custom);
1503 }
1504
1505 virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
1506 bool LegalOnly) const {
1507 if (LegalOnly)
1508 return isTruncStoreLegal(ValVT, MemVT);
1509
1510 return isTruncStoreLegalOrCustom(ValVT, MemVT);
1511 }
1512
1513 /// Return how the indexed load should be treated: either it is legal, needs
1514 /// to be promoted to a larger size, needs to be expanded to some other code
1515 /// sequence, or the target has a custom expander for it.
1516 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1517 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load);
1518 }
1519
1520 /// Return true if the specified indexed load is legal on this target.
1521 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1522 return VT.isSimple() &&
1523 (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1524 getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1525 }
1526
1527 /// Return how the indexed store should be treated: either it is legal, needs
1528 /// to be promoted to a larger size, needs to be expanded to some other code
1529 /// sequence, or the target has a custom expander for it.
1530 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1531 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store);
1532 }
1533
1534 /// Return true if the specified indexed load is legal on this target.
1535 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1536 return VT.isSimple() &&
1537 (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1538 getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1539 }
1540
1541 /// Return how the indexed load should be treated: either it is legal, needs
1542 /// to be promoted to a larger size, needs to be expanded to some other code
1543 /// sequence, or the target has a custom expander for it.
1544 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
1545 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad);
1546 }
1547
1548 /// Return true if the specified indexed load is legal on this target.
1549 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
1550 return VT.isSimple() &&
1551 (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1552 getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1553 }
1554
1555 /// Return how the indexed store should be treated: either it is legal, needs
1556 /// to be promoted to a larger size, needs to be expanded to some other code
1557 /// sequence, or the target has a custom expander for it.
1558 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
1559 return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore);
1560 }
1561
1562 /// Return true if the specified indexed load is legal on this target.
1563 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
1564 return VT.isSimple() &&
1565 (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal ||
1566 getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom);
1567 }
1568
1569 /// Returns true if the index type for a masked gather/scatter requires
1570 /// extending
1571 virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; }
1572
1573 // Returns true if Extend can be folded into the index of a masked gathers/scatters
1574 // on this target.
1575 virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const {
1576 return false;
1577 }
1578
1579 // Return true if the target supports a scatter/gather instruction with
1580 // indices which are scaled by the particular value. Note that all targets
1581 // must by definition support scale of 1.
1582 virtual bool isLegalScaleForGatherScatter(uint64_t Scale,
1583 uint64_t ElemSize) const {
1584 // MGATHER/MSCATTER are only required to support scaling by one or by the
1585 // element size.
1586 if (Scale != ElemSize && Scale != 1)
1587 return false;
1588 return true;
1589 }
1590
1591 /// Return how the condition code should be treated: either it is legal, needs
1592 /// to be expanded to some other code sequence, or the target has a custom
1593 /// expander for it.
1594 LegalizeAction
1595 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1596 assert((unsigned)CC < std::size(CondCodeActions) &&
1597 ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) &&
1598 "Table isn't big enough!");
1599 // See setCondCodeAction for how this is encoded.
1600 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1601 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1602 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1603 assert(Action != Promote && "Can't promote condition code!");
1604 return Action;
1605 }
1606
1607 /// Return true if the specified condition code is legal on this target.
1608 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1609 return getCondCodeAction(CC, VT) == Legal;
1610 }
1611
1612 /// Return true if the specified condition code is legal or custom on this
1613 /// target.
1614 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1615 return getCondCodeAction(CC, VT) == Legal ||
1616 getCondCodeAction(CC, VT) == Custom;
1617 }
1618
1619 /// If the action for this operation is to promote, this method returns the
1620 /// ValueType to promote to.
1621 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1622 assert(getOperationAction(Op, VT) == Promote &&
1623 "This operation isn't promoted!");
1624
1625 // See if this has an explicit type specified.
1626 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1627 MVT::SimpleValueType>::const_iterator PTTI =
1628 PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy));
1629 if (PTTI != PromoteToType.end()) return PTTI->second;
1630
1631 assert((VT.isInteger() || VT.isFloatingPoint()) &&
1632 "Cannot autopromote this type, add it with AddPromotedToType.");
1633
1634 uint64_t VTBits = VT.getScalarSizeInBits();
1635 MVT NVT = VT;
1636 do {
1637 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1638 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&
1639 "Didn't find type to promote to!");
1640 } while (VTBits >= NVT.getScalarSizeInBits() || !isTypeLegal(VT: NVT) ||
1641 getOperationAction(Op, VT: NVT) == Promote);
1642 return NVT;
1643 }
1644
1645 virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
1646 bool AllowUnknown = false) const {
1647 return getValueType(DL, Ty, AllowUnknown);
1648 }
1649
1650 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1651 /// operations except for the pointer size. If AllowUnknown is true, this
1652 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1653 /// otherwise it will assert.
1654 EVT getValueType(const DataLayout &DL, Type *Ty,
1655 bool AllowUnknown = false) const {
1656 // Lower scalar pointers to native pointer types.
1657 if (auto *PTy = dyn_cast<PointerType>(Val: Ty))
1658 return getPointerTy(DL, AS: PTy->getAddressSpace());
1659
1660 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
1661 Type *EltTy = VTy->getElementType();
1662 // Lower vectors of pointers to native pointer types.
1663 if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) {
1664 EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace()));
1665 EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext());
1666 }
1667 return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false),
1668 EC: VTy->getElementCount());
1669 }
1670
1671 return EVT::getEVT(Ty, HandleUnknown: AllowUnknown);
1672 }
1673
1674 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1675 bool AllowUnknown = false) const {
1676 // Lower scalar pointers to native pointer types.
1677 if (auto *PTy = dyn_cast<PointerType>(Val: Ty))
1678 return getPointerMemTy(DL, AS: PTy->getAddressSpace());
1679
1680 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
1681 Type *EltTy = VTy->getElementType();
1682 if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) {
1683 EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace()));
1684 EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext());
1685 }
1686 return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false),
1687 EC: VTy->getElementCount());
1688 }
1689
1690 return getValueType(DL, Ty, AllowUnknown);
1691 }
1692
1693
1694 /// Return the MVT corresponding to this LLVM type. See getValueType.
1695 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1696 bool AllowUnknown = false) const {
1697 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1698 }
1699
1700 /// Return the desired alignment for ByVal or InAlloca aggregate function
1701 /// arguments in the caller parameter area. This is the actual alignment, not
1702 /// its logarithm.
1703 virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1704
1705 /// Return the type of registers that this ValueType will eventually require.
1706 MVT getRegisterType(MVT VT) const {
1707 assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT));
1708 return RegisterTypeForVT[VT.SimpleTy];
1709 }
1710
1711 /// Return the type of registers that this ValueType will eventually require.
1712 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1713 if (VT.isSimple())
1714 return getRegisterType(VT: VT.getSimpleVT());
1715 if (VT.isVector()) {
1716 EVT VT1;
1717 MVT RegisterVT;
1718 unsigned NumIntermediates;
1719 (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1,
1720 NumIntermediates, RegisterVT);
1721 return RegisterVT;
1722 }
1723 if (VT.isInteger()) {
1724 return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT));
1725 }
1726 llvm_unreachable("Unsupported extended type!");
1727 }
1728
1729 /// Return the number of registers that this ValueType will eventually
1730 /// require.
1731 ///
1732 /// This is one for any types promoted to live in larger registers, but may be
1733 /// more than one for types (like i64) that are split into pieces. For types
1734 /// like i140, which are first promoted then expanded, it is the number of
1735 /// registers needed to hold all the bits of the original type. For an i140
1736 /// on a 32 bit machine this means 5 registers.
1737 ///
1738 /// RegisterVT may be passed as a way to override the default settings, for
1739 /// instance with i128 inline assembly operands on SystemZ.
1740 virtual unsigned
1741 getNumRegisters(LLVMContext &Context, EVT VT,
1742 std::optional<MVT> RegisterVT = std::nullopt) const {
1743 if (VT.isSimple()) {
1744 assert((unsigned)VT.getSimpleVT().SimpleTy <
1745 std::size(NumRegistersForVT));
1746 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1747 }
1748 if (VT.isVector()) {
1749 EVT VT1;
1750 MVT VT2;
1751 unsigned NumIntermediates;
1752 return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2);
1753 }
1754 if (VT.isInteger()) {
1755 unsigned BitWidth = VT.getSizeInBits();
1756 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1757 return (BitWidth + RegWidth - 1) / RegWidth;
1758 }
1759 llvm_unreachable("Unsupported extended type!");
1760 }
1761
1762 /// Certain combinations of ABIs, Targets and features require that types
1763 /// are legal for some operations and not for other operations.
1764 /// For MIPS all vector types must be passed through the integer register set.
1765 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1766 CallingConv::ID CC, EVT VT) const {
1767 return getRegisterType(Context, VT);
1768 }
1769
1770 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1771 /// this occurs when a vector type is used, as vector are passed through the
1772 /// integer register set.
1773 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1774 CallingConv::ID CC,
1775 EVT VT) const {
1776 return getNumRegisters(Context, VT);
1777 }
1778
1779 /// Certain targets have context sensitive alignment requirements, where one
1780 /// type has the alignment requirement of another type.
1781 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1782 const DataLayout &DL) const {
1783 return DL.getABITypeAlign(Ty: ArgTy);
1784 }
1785
1786 /// If true, then instruction selection should seek to shrink the FP constant
1787 /// of the specified type to a smaller type in order to save space and / or
1788 /// reduce runtime.
1789 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1790
1791 /// Return true if it is profitable to reduce a load to a smaller type.
1792 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1793 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1794 EVT NewVT) const {
1795 // By default, assume that it is cheaper to extract a subvector from a wide
1796 // vector load rather than creating multiple narrow vector loads.
1797 if (NewVT.isVector() && !Load->hasOneUse())
1798 return false;
1799
1800 return true;
1801 }
1802
1803 /// Return true (the default) if it is profitable to remove a sext_inreg(x)
1804 /// where the sext is redundant, and use x directly.
1805 virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; }
1806
1807 /// Indicates if any padding is guaranteed to go at the most significant bits
1808 /// when storing the type to memory and the type size isn't equal to the store
1809 /// size.
1810 bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const {
1811 return VT.isScalarInteger() && !VT.isByteSized();
1812 }
1813
1814 /// When splitting a value of the specified type into parts, does the Lo
1815 /// or Hi part come first? This usually follows the endianness, except
1816 /// for ppcf128, where the Hi part always comes first.
1817 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1818 return DL.isBigEndian() || VT == MVT::ppcf128;
1819 }
1820
1821 /// If true, the target has custom DAG combine transformations that it can
1822 /// perform for the specified node.
1823 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1824 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray));
1825 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1826 }
1827
1828 unsigned getGatherAllAliasesMaxDepth() const {
1829 return GatherAllAliasesMaxDepth;
1830 }
1831
1832 /// Returns the size of the platform's va_list object.
1833 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1834 return getPointerTy(DL).getSizeInBits();
1835 }
1836
1837 /// Get maximum # of store operations permitted for llvm.memset
1838 ///
1839 /// This function returns the maximum number of store operations permitted
1840 /// to replace a call to llvm.memset. The value is set by the target at the
1841 /// performance threshold for such a replacement. If OptSize is true,
1842 /// return the limit for functions that have OptSize attribute.
1843 unsigned getMaxStoresPerMemset(bool OptSize) const {
1844 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1845 }
1846
1847 /// Get maximum # of store operations permitted for llvm.memcpy
1848 ///
1849 /// This function returns the maximum number of store operations permitted
1850 /// to replace a call to llvm.memcpy. The value is set by the target at the
1851 /// performance threshold for such a replacement. If OptSize is true,
1852 /// return the limit for functions that have OptSize attribute.
1853 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1854 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1855 }
1856
1857 /// \brief Get maximum # of store operations to be glued together
1858 ///
1859 /// This function returns the maximum number of store operations permitted
1860 /// to glue together during lowering of llvm.memcpy. The value is set by
1861 // the target at the performance threshold for such a replacement.
1862 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1863 return MaxGluedStoresPerMemcpy;
1864 }
1865
1866 /// Get maximum # of load operations permitted for memcmp
1867 ///
1868 /// This function returns the maximum number of load operations permitted
1869 /// to replace a call to memcmp. The value is set by the target at the
1870 /// performance threshold for such a replacement. If OptSize is true,
1871 /// return the limit for functions that have OptSize attribute.
1872 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1873 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1874 }
1875
1876 /// Get maximum # of store operations permitted for llvm.memmove
1877 ///
1878 /// This function returns the maximum number of store operations permitted
1879 /// to replace a call to llvm.memmove. The value is set by the target at the
1880 /// performance threshold for such a replacement. If OptSize is true,
1881 /// return the limit for functions that have OptSize attribute.
1882 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1883 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1884 }
1885
1886 /// Determine if the target supports unaligned memory accesses.
1887 ///
1888 /// This function returns true if the target allows unaligned memory accesses
1889 /// of the specified type in the given address space. If true, it also returns
1890 /// a relative speed of the unaligned memory access in the last argument by
1891 /// reference. The higher the speed number the faster the operation comparing
1892 /// to a number returned by another such call. This is used, for example, in
1893 /// situations where an array copy/move/set is converted to a sequence of
1894 /// store operations. Its use helps to ensure that such replacements don't
1895 /// generate code that causes an alignment error (trap) on the target machine.
1896 virtual bool allowsMisalignedMemoryAccesses(
1897 EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1898 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1899 unsigned * /*Fast*/ = nullptr) const {
1900 return false;
1901 }
1902
1903 /// LLT handling variant.
1904 virtual bool allowsMisalignedMemoryAccesses(
1905 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1906 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1907 unsigned * /*Fast*/ = nullptr) const {
1908 return false;
1909 }
1910
1911 /// This function returns true if the memory access is aligned or if the
1912 /// target allows this specific unaligned memory access. If the access is
1913 /// allowed, the optional final parameter returns a relative speed of the
1914 /// access (as defined by the target).
1915 bool allowsMemoryAccessForAlignment(
1916 LLVMContext &Context, const DataLayout &DL, EVT VT,
1917 unsigned AddrSpace = 0, Align Alignment = Align(1),
1918 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1919 unsigned *Fast = nullptr) const;
1920
1921 /// Return true if the memory access of this type is aligned or if the target
1922 /// allows this specific unaligned access for the given MachineMemOperand.
1923 /// If the access is allowed, the optional final parameter returns a relative
1924 /// speed of the access (as defined by the target).
1925 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1926 const DataLayout &DL, EVT VT,
1927 const MachineMemOperand &MMO,
1928 unsigned *Fast = nullptr) const;
1929
1930 /// Return true if the target supports a memory access of this type for the
1931 /// given address space and alignment. If the access is allowed, the optional
1932 /// final parameter returns the relative speed of the access (as defined by
1933 /// the target).
1934 virtual bool
1935 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1936 unsigned AddrSpace = 0, Align Alignment = Align(1),
1937 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1938 unsigned *Fast = nullptr) const;
1939
1940 /// Return true if the target supports a memory access of this type for the
1941 /// given MachineMemOperand. If the access is allowed, the optional
1942 /// final parameter returns the relative access speed (as defined by the
1943 /// target).
1944 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1945 const MachineMemOperand &MMO,
1946 unsigned *Fast = nullptr) const;
1947
1948 /// LLT handling variant.
1949 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
1950 const MachineMemOperand &MMO,
1951 unsigned *Fast = nullptr) const;
1952
1953 /// Returns the target specific optimal type for load and store operations as
1954 /// a result of memset, memcpy, and memmove lowering.
1955 /// It returns EVT::Other if the type should be determined using generic
1956 /// target-independent logic.
1957 virtual EVT
1958 getOptimalMemOpType(const MemOp &Op,
1959 const AttributeList & /*FuncAttributes*/) const {
1960 return MVT::Other;
1961 }
1962
1963 /// LLT returning variant.
1964 virtual LLT
1965 getOptimalMemOpLLT(const MemOp &Op,
1966 const AttributeList & /*FuncAttributes*/) const {
1967 return LLT();
1968 }
1969
1970 /// Returns true if it's safe to use load / store of the specified type to
1971 /// expand memcpy / memset inline.
1972 ///
1973 /// This is mostly true for all types except for some special cases. For
1974 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1975 /// fstpl which also does type conversion. Note the specified type doesn't
1976 /// have to be legal as the hook is used before type legalization.
1977 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1978
1979 /// Return lower limit for number of blocks in a jump table.
1980 virtual unsigned getMinimumJumpTableEntries() const;
1981
1982 /// Return lower limit of the density in a jump table.
1983 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1984
1985 /// Return upper limit for number of entries in a jump table.
1986 /// Zero if no limit.
1987 unsigned getMaximumJumpTableSize() const;
1988
1989 virtual bool isJumpTableRelative() const;
1990
1991 /// If a physical register, this specifies the register that
1992 /// llvm.savestack/llvm.restorestack should save and restore.
1993 Register getStackPointerRegisterToSaveRestore() const {
1994 return StackPointerRegisterToSaveRestore;
1995 }
1996
1997 /// If a physical register, this returns the register that receives the
1998 /// exception address on entry to an EH pad.
1999 virtual Register
2000 getExceptionPointerRegister(const Constant *PersonalityFn) const {
2001 return Register();
2002 }
2003
2004 /// If a physical register, this returns the register that receives the
2005 /// exception typeid on entry to a landing pad.
2006 virtual Register
2007 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
2008 return Register();
2009 }
2010
2011 virtual bool needsFixedCatchObjects() const {
2012 report_fatal_error(reason: "Funclet EH is not implemented for this target");
2013 }
2014
2015 /// Return the minimum stack alignment of an argument.
2016 Align getMinStackArgumentAlignment() const {
2017 return MinStackArgumentAlignment;
2018 }
2019
2020 /// Return the minimum function alignment.
2021 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
2022
2023 /// Return the preferred function alignment.
2024 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
2025
2026 /// Return the preferred loop alignment.
2027 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;
2028
2029 /// Return the maximum amount of bytes allowed to be emitted when padding for
2030 /// alignment
2031 virtual unsigned
2032 getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const;
2033
2034 /// Should loops be aligned even when the function is marked OptSize (but not
2035 /// MinSize).
2036 virtual bool alignLoopsWithOptSize() const { return false; }
2037
2038 /// If the target has a standard location for the stack protector guard,
2039 /// returns the address of that location. Otherwise, returns nullptr.
2040 /// DEPRECATED: please override useLoadStackGuardNode and customize
2041 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
2042 virtual Value *getIRStackGuard(IRBuilderBase &IRB) const;
2043
2044 /// Inserts necessary declarations for SSP (stack protection) purpose.
2045 /// Should be used only when getIRStackGuard returns nullptr.
2046 virtual void insertSSPDeclarations(Module &M) const;
2047
2048 /// Return the variable that's previously inserted by insertSSPDeclarations,
2049 /// if any, otherwise return nullptr. Should be used only when
2050 /// getIRStackGuard returns nullptr.
2051 virtual Value *getSDagStackGuard(const Module &M) const;
2052
2053 /// If this function returns true, stack protection checks should XOR the
2054 /// frame pointer (or whichever pointer is used to address locals) into the
2055 /// stack guard value before checking it. getIRStackGuard must return nullptr
2056 /// if this returns true.
2057 virtual bool useStackGuardXorFP() const { return false; }
2058
2059 /// If the target has a standard stack protection check function that
2060 /// performs validation and error handling, returns the function. Otherwise,
2061 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
2062 /// Should be used only when getIRStackGuard returns nullptr.
2063 virtual Function *getSSPStackGuardCheck(const Module &M) const;
2064
2065protected:
2066 Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB,
2067 bool UseTLS) const;
2068
2069public:
2070 /// Returns the target-specific address of the unsafe stack pointer.
2071 virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const;
2072
2073 /// Returns the name of the symbol used to emit stack probes or the empty
2074 /// string if not applicable.
2075 virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; }
2076
2077 virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; }
2078
2079 virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const {
2080 return "";
2081 }
2082
2083 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
2084 /// are happy to sink it into basic blocks. A cast may be free, but not
2085 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
2086 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
2087
2088 /// Return true if the pointer arguments to CI should be aligned by aligning
2089 /// the object whose address is being passed. If so then MinSize is set to the
2090 /// minimum size the object must be to be aligned and PrefAlign is set to the
2091 /// preferred alignment.
2092 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
2093 Align & /*PrefAlign*/) const {
2094 return false;
2095 }
2096
2097 //===--------------------------------------------------------------------===//
2098 /// \name Helpers for TargetTransformInfo implementations
2099 /// @{
2100
2101 /// Get the ISD node that corresponds to the Instruction class opcode.
2102 int InstructionOpcodeToISD(unsigned Opcode) const;
2103
2104 /// @}
2105
2106 //===--------------------------------------------------------------------===//
2107 /// \name Helpers for atomic expansion.
2108 /// @{
2109
2110 /// Returns the maximum atomic operation size (in bits) supported by
2111 /// the backend. Atomic operations greater than this size (as well
2112 /// as ones that are not naturally aligned), will be expanded by
2113 /// AtomicExpandPass into an __atomic_* library call.
2114 unsigned getMaxAtomicSizeInBitsSupported() const {
2115 return MaxAtomicSizeInBitsSupported;
2116 }
2117
2118 /// Returns the size in bits of the maximum div/rem the backend supports.
2119 /// Larger operations will be expanded by ExpandLargeDivRem.
2120 unsigned getMaxDivRemBitWidthSupported() const {
2121 return MaxDivRemBitWidthSupported;
2122 }
2123
2124 /// Returns the size in bits of the maximum larget fp convert the backend
2125 /// supports. Larger operations will be expanded by ExpandLargeFPConvert.
2126 unsigned getMaxLargeFPConvertBitWidthSupported() const {
2127 return MaxLargeFPConvertBitWidthSupported;
2128 }
2129
2130 /// Returns the size of the smallest cmpxchg or ll/sc instruction
2131 /// the backend supports. Any smaller operations are widened in
2132 /// AtomicExpandPass.
2133 ///
2134 /// Note that *unlike* operations above the maximum size, atomic ops
2135 /// are still natively supported below the minimum; they just
2136 /// require a more complex expansion.
2137 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
2138
2139 /// Whether the target supports unaligned atomic operations.
2140 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
2141
2142 /// Whether AtomicExpandPass should automatically insert fences and reduce
2143 /// ordering for this atomic. This should be true for most architectures with
2144 /// weak memory ordering. Defaults to false.
2145 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
2146 return false;
2147 }
2148
2149 /// Whether AtomicExpandPass should automatically insert a trailing fence
2150 /// without reducing the ordering for this atomic. Defaults to false.
2151 virtual bool
2152 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const {
2153 return false;
2154 }
2155
2156 /// Perform a load-linked operation on Addr, returning a "Value *" with the
2157 /// corresponding pointee type. This may entail some non-trivial operations to
2158 /// truncate or reconstruct types that will be illegal in the backend. See
2159 /// ARMISelLowering for an example implementation.
2160 virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
2161 Value *Addr, AtomicOrdering Ord) const {
2162 llvm_unreachable("Load linked unimplemented on this target");
2163 }
2164
2165 /// Perform a store-conditional operation to Addr. Return the status of the
2166 /// store. This should be 0 if the store succeeded, non-zero otherwise.
2167 virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val,
2168 Value *Addr, AtomicOrdering Ord) const {
2169 llvm_unreachable("Store conditional unimplemented on this target");
2170 }
2171
2172 /// Perform a masked atomicrmw using a target-specific intrinsic. This
2173 /// represents the core LL/SC loop which will be lowered at a late stage by
2174 /// the backend. The target-specific intrinsic returns the loaded value and
2175 /// is not responsible for masking and shifting the result.
2176 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
2177 AtomicRMWInst *AI,
2178 Value *AlignedAddr, Value *Incr,
2179 Value *Mask, Value *ShiftAmt,
2180 AtomicOrdering Ord) const {
2181 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
2182 }
2183
2184 /// Perform a atomicrmw expansion using a target-specific way. This is
2185 /// expected to be called when masked atomicrmw and bit test atomicrmw don't
2186 /// work, and the target supports another way to lower atomicrmw.
2187 virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const {
2188 llvm_unreachable(
2189 "Generic atomicrmw expansion unimplemented on this target");
2190 }
2191
2192 /// Perform a bit test atomicrmw using a target-specific intrinsic. This
2193 /// represents the combined bit test intrinsic which will be lowered at a late
2194 /// stage by the backend.
2195 virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
2196 llvm_unreachable(
2197 "Bit test atomicrmw expansion unimplemented on this target");
2198 }
2199
2200 /// Perform a atomicrmw which the result is only used by comparison, using a
2201 /// target-specific intrinsic. This represents the combined atomic and compare
2202 /// intrinsic which will be lowered at a late stage by the backend.
2203 virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
2204 llvm_unreachable(
2205 "Compare arith atomicrmw expansion unimplemented on this target");
2206 }
2207
2208 /// Perform a masked cmpxchg using a target-specific intrinsic. This
2209 /// represents the core LL/SC loop which will be lowered at a late stage by
2210 /// the backend. The target-specific intrinsic returns the loaded value and
2211 /// is not responsible for masking and shifting the result.
2212 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
2213 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
2214 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
2215 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
2216 }
2217
2218 //===--------------------------------------------------------------------===//
2219 /// \name KCFI check lowering.
2220 /// @{
2221
2222 virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
2223 MachineBasicBlock::instr_iterator &MBBI,
2224 const TargetInstrInfo *TII) const {
2225 llvm_unreachable("KCFI is not supported on this target");
2226 }
2227
2228 /// @}
2229
2230 /// Inserts in the IR a target-specific intrinsic specifying a fence.
2231 /// It is called by AtomicExpandPass before expanding an
2232 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
2233 /// if shouldInsertFencesForAtomic returns true.
2234 ///
2235 /// Inst is the original atomic instruction, prior to other expansions that
2236 /// may be performed.
2237 ///
2238 /// This function should either return a nullptr, or a pointer to an IR-level
2239 /// Instruction*. Even complex fence sequences can be represented by a
2240 /// single Instruction* through an intrinsic to be lowered later.
2241 ///
2242 /// The default implementation emits an IR fence before any release (or
2243 /// stronger) operation that stores, and after any acquire (or stronger)
2244 /// operation. This is generally a correct implementation, but backends may
2245 /// override if they wish to use alternative schemes (e.g. the PowerPC
2246 /// standard ABI uses a fence before a seq_cst load instead of after a
2247 /// seq_cst store).
2248 /// @{
2249 virtual Instruction *emitLeadingFence(IRBuilderBase &Builder,
2250 Instruction *Inst,
2251 AtomicOrdering Ord) const;
2252
2253 virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
2254 Instruction *Inst,
2255 AtomicOrdering Ord) const;
2256 /// @}
2257
2258 // Emits code that executes when the comparison result in the ll/sc
2259 // expansion of a cmpxchg instruction is such that the store-conditional will
2260 // not execute. This makes it possible to balance out the load-linked with
2261 // a dedicated instruction, if desired.
2262 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
2263 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
2264 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {}
2265
2266 /// Returns true if arguments should be sign-extended in lib calls.
2267 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
2268 return IsSigned;
2269 }
2270
2271 /// Returns true if arguments should be extended in lib calls.
2272 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
2273 return true;
2274 }
2275
2276 /// Returns how the given (atomic) load should be expanded by the
2277 /// IR-level AtomicExpand pass.
2278 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2279 return AtomicExpansionKind::None;
2280 }
2281
2282 /// Returns how the given (atomic) load should be cast by the IR-level
2283 /// AtomicExpand pass.
2284 virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const {
2285 if (LI->getType()->isFloatingPointTy())
2286 return AtomicExpansionKind::CastToInteger;
2287 return AtomicExpansionKind::None;
2288 }
2289
2290 /// Returns how the given (atomic) store should be expanded by the IR-level
2291 /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try
2292 /// to use an atomicrmw xchg.
2293 virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const {
2294 return AtomicExpansionKind::None;
2295 }
2296
2297 /// Returns how the given (atomic) store should be cast by the IR-level
2298 /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger
2299 /// will try to cast the operands to integer values.
2300 virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const {
2301 if (SI->getValueOperand()->getType()->isFloatingPointTy())
2302 return AtomicExpansionKind::CastToInteger;
2303 return AtomicExpansionKind::None;
2304 }
2305
2306 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
2307 /// AtomicExpand pass.
2308 virtual AtomicExpansionKind
2309 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
2310 return AtomicExpansionKind::None;
2311 }
2312
2313 /// Returns how the IR-level AtomicExpand pass should expand the given
2314 /// AtomicRMW, if at all. Default is to never expand.
2315 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2316 return RMW->isFloatingPointOperation() ?
2317 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
2318 }
2319
2320 /// Returns how the given atomic atomicrmw should be cast by the IR-level
2321 /// AtomicExpand pass.
2322 virtual AtomicExpansionKind
2323 shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const {
2324 if (RMWI->getOperation() == AtomicRMWInst::Xchg &&
2325 (RMWI->getValOperand()->getType()->isFloatingPointTy() ||
2326 RMWI->getValOperand()->getType()->isPointerTy()))
2327 return AtomicExpansionKind::CastToInteger;
2328
2329 return AtomicExpansionKind::None;
2330 }
2331
2332 /// On some platforms, an AtomicRMW that never actually modifies the value
2333 /// (such as fetch_add of 0) can be turned into a fence followed by an
2334 /// atomic load. This may sound useless, but it makes it possible for the
2335 /// processor to keep the cacheline shared, dramatically improving
2336 /// performance. And such idempotent RMWs are useful for implementing some
2337 /// kinds of locks, see for example (justification + benchmarks):
2338 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
2339 /// This method tries doing that transformation, returning the atomic load if
2340 /// it succeeds, and nullptr otherwise.
2341 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
2342 /// another round of expansion.
2343 virtual LoadInst *
2344 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
2345 return nullptr;
2346 }
2347
2348 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
2349 /// SIGN_EXTEND, or ANY_EXTEND).
2350 virtual ISD::NodeType getExtendForAtomicOps() const {
2351 return ISD::ZERO_EXTEND;
2352 }
2353
2354 /// Returns how the platform's atomic compare and swap expects its comparison
2355 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is
2356 /// separate from getExtendForAtomicOps, which is concerned with the
2357 /// sign-extension of the instruction's output, whereas here we are concerned
2358 /// with the sign-extension of the input. For targets with compare-and-swap
2359 /// instructions (or sub-word comparisons in their LL/SC loop expansions),
2360 /// the input can be ANY_EXTEND, but the output will still have a specific
2361 /// extension.
2362 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const {
2363 return ISD::ANY_EXTEND;
2364 }
2365
2366 /// @}
2367
2368 /// Returns true if we should normalize
2369 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
2370 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
2371 /// that it saves us from materializing N0 and N1 in an integer register.
2372 /// Targets that are able to perform and/or on flags should return false here.
2373 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
2374 EVT VT) const {
2375 // If a target has multiple condition registers, then it likely has logical
2376 // operations on those registers.
2377 if (hasMultipleConditionRegisters())
2378 return false;
2379 // Only do the transform if the value won't be split into multiple
2380 // registers.
2381 LegalizeTypeAction Action = getTypeAction(Context, VT);
2382 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
2383 Action != TypeSplitVector;
2384 }
2385
2386 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
2387
2388 /// Return true if a select of constants (select Cond, C1, C2) should be
2389 /// transformed into simple math ops with the condition value. For example:
2390 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
2391 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
2392 return false;
2393 }
2394
2395 /// Return true if it is profitable to transform an integer
2396 /// multiplication-by-constant into simpler operations like shifts and adds.
2397 /// This may be true if the target does not directly support the
2398 /// multiplication operation for the specified type or the sequence of simpler
2399 /// ops is faster than the multiply.
2400 virtual bool decomposeMulByConstant(LLVMContext &Context,
2401 EVT VT, SDValue C) const {
2402 return false;
2403 }
2404
2405 /// Return true if it may be profitable to transform
2406 /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
2407 /// This may not be true if c1 and c2 can be represented as immediates but
2408 /// c1*c2 cannot, for example.
2409 /// The target should check if c1, c2 and c1*c2 can be represented as
2410 /// immediates, or have to be materialized into registers. If it is not sure
2411 /// about some cases, a default true can be returned to let the DAGCombiner
2412 /// decide.
2413 /// AddNode is (add x, c1), and ConstNode is c2.
2414 virtual bool isMulAddWithConstProfitable(SDValue AddNode,
2415 SDValue ConstNode) const {
2416 return true;
2417 }
2418
2419 /// Return true if it is more correct/profitable to use strict FP_TO_INT
2420 /// conversion operations - canonicalizing the FP source value instead of
2421 /// converting all cases and then selecting based on value.
2422 /// This may be true if the target throws exceptions for out of bounds
2423 /// conversions or has fast FP CMOV.
2424 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
2425 bool IsSigned) const {
2426 return false;
2427 }
2428
2429 /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic.
2430 /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always
2431 /// considered beneficial.
2432 /// If optimizing for size, expansion is only considered beneficial for upto
2433 /// 5 multiplies and a divide (if the exponent is negative).
2434 bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const {
2435 if (Exponent < 0)
2436 Exponent = -Exponent;
2437 uint64_t E = static_cast<uint64_t>(Exponent);
2438 return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7);
2439 }
2440
2441 //===--------------------------------------------------------------------===//
2442 // TargetLowering Configuration Methods - These methods should be invoked by
2443 // the derived class constructor to configure this object for the target.
2444 //
2445protected:
2446 /// Specify how the target extends the result of integer and floating point
2447 /// boolean values from i1 to a wider type. See getBooleanContents.
2448 void setBooleanContents(BooleanContent Ty) {
2449 BooleanContents = Ty;
2450 BooleanFloatContents = Ty;
2451 }
2452
2453 /// Specify how the target extends the result of integer and floating point
2454 /// boolean values from i1 to a wider type. See getBooleanContents.
2455 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
2456 BooleanContents = IntTy;
2457 BooleanFloatContents = FloatTy;
2458 }
2459
2460 /// Specify how the target extends the result of a vector boolean value from a
2461 /// vector of i1 to a wider type. See getBooleanContents.
2462 void setBooleanVectorContents(BooleanContent Ty) {
2463 BooleanVectorContents = Ty;
2464 }
2465
2466 /// Specify the target scheduling preference.
2467 void setSchedulingPreference(Sched::Preference Pref) {
2468 SchedPreferenceInfo = Pref;
2469 }
2470
2471 /// Indicate the minimum number of blocks to generate jump tables.
2472 void setMinimumJumpTableEntries(unsigned Val);
2473
2474 /// Indicate the maximum number of entries in jump tables.
2475 /// Set to zero to generate unlimited jump tables.
2476 void setMaximumJumpTableSize(unsigned);
2477
2478 /// If set to a physical register, this specifies the register that
2479 /// llvm.savestack/llvm.restorestack should save and restore.
2480 void setStackPointerRegisterToSaveRestore(Register R) {
2481 StackPointerRegisterToSaveRestore = R;
2482 }
2483
2484 /// Tells the code generator that the target has multiple (allocatable)
2485 /// condition registers that can be used to store the results of comparisons
2486 /// for use by selects and conditional branches. With multiple condition
2487 /// registers, the code generator will not aggressively sink comparisons into
2488 /// the blocks of their users.
2489 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2490 HasMultipleConditionRegisters = hasManyRegs;
2491 }
2492
2493 /// Tells the code generator that the target has BitExtract instructions.
2494 /// The code generator will aggressively sink "shift"s into the blocks of
2495 /// their users if the users will generate "and" instructions which can be
2496 /// combined with "shift" to BitExtract instructions.
2497 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
2498 HasExtractBitsInsn = hasExtractInsn;
2499 }
2500
2501 /// Tells the code generator not to expand logic operations on comparison
2502 /// predicates into separate sequences that increase the amount of flow
2503 /// control.
2504 void setJumpIsExpensive(bool isExpensive = true);
2505
2506 /// Tells the code generator which bitwidths to bypass.
2507 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2508 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2509 }
2510
2511 /// Add the specified register class as an available regclass for the
2512 /// specified value type. This indicates the selector can handle values of
2513 /// that class natively.
2514 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2515 assert((unsigned)VT.SimpleTy < std::size(RegClassForVT));
2516 RegClassForVT[VT.SimpleTy] = RC;
2517 }
2518
2519 /// Return the largest legal super-reg register class of the register class
2520 /// for the specified type and its associated "cost".
2521 virtual std::pair<const TargetRegisterClass *, uint8_t>
2522 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2523
2524 /// Once all of the register classes are added, this allows us to compute
2525 /// derived properties we expose.
2526 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2527
2528 /// Indicate that the specified operation does not work with the specified
2529 /// type and indicate what to do about it. Note that VT may refer to either
2530 /// the type of a result or that of an operand of Op.
2531 void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
2532 assert(Op < std::size(OpActions[0]) && "Table isn't big enough!");
2533 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2534 }
2535 void setOperationAction(ArrayRef<unsigned> Ops, MVT VT,
2536 LegalizeAction Action) {
2537 for (auto Op : Ops)
2538 setOperationAction(Op, VT, Action);
2539 }
2540 void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs,
2541 LegalizeAction Action) {
2542 for (auto VT : VTs)
2543 setOperationAction(Ops, VT, Action);
2544 }
2545
2546 /// Indicate that the specified load with extension does not work with the
2547 /// specified type and indicate what to do about it.
2548 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2549 LegalizeAction Action) {
2550 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
2551 MemVT.isValid() && "Table isn't big enough!");
2552 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2553 unsigned Shift = 4 * ExtType;
2554 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2555 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2556 }
2557 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
2558 LegalizeAction Action) {
2559 for (auto ExtType : ExtTypes)
2560 setLoadExtAction(ExtType, ValVT, MemVT, Action);
2561 }
2562 void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
2563 ArrayRef<MVT> MemVTs, LegalizeAction Action) {
2564 for (auto MemVT : MemVTs)
2565 setLoadExtAction(ExtTypes, ValVT, MemVT, Action);
2566 }
2567
2568 /// Let target indicate that an extending atomic load of the specified type
2569 /// is legal.
2570 void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2571 LegalizeAction Action) {
2572 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
2573 MemVT.isValid() && "Table isn't big enough!");
2574 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2575 unsigned Shift = 4 * ExtType;
2576 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &=
2577 ~((uint16_t)0xF << Shift);
2578 AtomicLoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |=
2579 ((uint16_t)Action << Shift);
2580 }
2581 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT,
2582 LegalizeAction Action) {
2583 for (auto ExtType : ExtTypes)
2584 setAtomicLoadExtAction(ExtType, ValVT, MemVT, Action);
2585 }
2586 void setAtomicLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT,
2587 ArrayRef<MVT> MemVTs, LegalizeAction Action) {
2588 for (auto MemVT : MemVTs)
2589 setAtomicLoadExtAction(ExtTypes, ValVT, MemVT, Action);
2590 }
2591
2592 /// Indicate that the specified truncating store does not work with the
2593 /// specified type and indicate what to do about it.
2594 void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) {
2595 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
2596 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2597 }
2598
2599 /// Indicate that the specified indexed load does or does not work with the
2600 /// specified type and indicate what to do abort it.
2601 ///
2602 /// NOTE: All indexed mode loads are initialized to Expand in
2603 /// TargetLowering.cpp
2604 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT,
2605 LegalizeAction Action) {
2606 for (auto IdxMode : IdxModes)
2607 setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action);
2608 }
2609
2610 void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
2611 LegalizeAction Action) {
2612 for (auto VT : VTs)
2613 setIndexedLoadAction(IdxModes, VT, Action);
2614 }
2615
2616 /// Indicate that the specified indexed store does or does not work with the
2617 /// specified type and indicate what to do about it.
2618 ///
2619 /// NOTE: All indexed mode stores are initialized to Expand in
2620 /// TargetLowering.cpp
2621 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT,
2622 LegalizeAction Action) {
2623 for (auto IdxMode : IdxModes)
2624 setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action);
2625 }
2626
2627 void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs,
2628 LegalizeAction Action) {
2629 for (auto VT : VTs)
2630 setIndexedStoreAction(IdxModes, VT, Action);
2631 }
2632
2633 /// Indicate that the specified indexed masked load does or does not work with
2634 /// the specified type and indicate what to do about it.
2635 ///
2636 /// NOTE: All indexed mode masked loads are initialized to Expand in
2637 /// TargetLowering.cpp
2638 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
2639 LegalizeAction Action) {
2640 setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action);
2641 }
2642
2643 /// Indicate that the specified indexed masked store does or does not work
2644 /// with the specified type and indicate what to do about it.
2645 ///
2646 /// NOTE: All indexed mode masked stores are initialized to Expand in
2647 /// TargetLowering.cpp
2648 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
2649 LegalizeAction Action) {
2650 setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action);
2651 }
2652
2653 /// Indicate that the specified condition code is or isn't supported on the
2654 /// target and indicate what to do about it.
2655 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT,
2656 LegalizeAction Action) {
2657 for (auto CC : CCs) {
2658 assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) &&
2659 "Table isn't big enough!");
2660 assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
2661 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the
2662 /// 32-bit value and the upper 29 bits index into the second dimension of
2663 /// the array to select what 32-bit value to use.
2664 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2665 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2666 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2667 }
2668 }
2669 void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs,
2670 LegalizeAction Action) {
2671 for (auto VT : VTs)
2672 setCondCodeAction(CCs, VT, Action);
2673 }
2674
2675 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2676 /// to trying a larger integer/fp until it can find one that works. If that
2677 /// default is insufficient, this method can be used by the target to override
2678 /// the default.
2679 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2680 PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy;
2681 }
2682
2683 /// Convenience method to set an operation to Promote and specify the type
2684 /// in a single call.
2685 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2686 setOperationAction(Op: Opc, VT: OrigVT, Action: Promote);
2687 AddPromotedToType(Opc, OrigVT, DestVT);
2688 }
2689 void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT,
2690 MVT DestVT) {
2691 for (auto Op : Ops) {
2692 setOperationAction(Op, VT: OrigVT, Action: Promote);
2693 AddPromotedToType(Opc: Op, OrigVT, DestVT);
2694 }
2695 }
2696
2697 /// Targets should invoke this method for each target independent node that
2698 /// they want to provide a custom DAG combiner for by implementing the
2699 /// PerformDAGCombine virtual method.
2700 void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) {
2701 for (auto NT : NTs) {
2702 assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray));
2703 TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7);
2704 }
2705 }
2706
2707 /// Set the target's minimum function alignment.
2708 void setMinFunctionAlignment(Align Alignment) {
2709 MinFunctionAlignment = Alignment;
2710 }
2711
2712 /// Set the target's preferred function alignment. This should be set if
2713 /// there is a performance benefit to higher-than-minimum alignment
2714 void setPrefFunctionAlignment(Align Alignment) {
2715 PrefFunctionAlignment = Alignment;
2716 }
2717
2718 /// Set the target's preferred loop alignment. Default alignment is one, it
2719 /// means the target does not care about loop alignment. The target may also
2720 /// override getPrefLoopAlignment to provide per-loop values.
2721 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2722 void setMaxBytesForAlignment(unsigned MaxBytes) {
2723 MaxBytesForAlignment = MaxBytes;
2724 }
2725
2726 /// Set the minimum stack alignment of an argument.
2727 void setMinStackArgumentAlignment(Align Alignment) {
2728 MinStackArgumentAlignment = Alignment;
2729 }
2730
2731 /// Set the maximum atomic operation size supported by the
2732 /// backend. Atomic operations greater than this size (as well as
2733 /// ones that are not naturally aligned), will be expanded by
2734 /// AtomicExpandPass into an __atomic_* library call.
2735 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2736 MaxAtomicSizeInBitsSupported = SizeInBits;
2737 }
2738
2739 /// Set the size in bits of the maximum div/rem the backend supports.
2740 /// Larger operations will be expanded by ExpandLargeDivRem.
2741 void setMaxDivRemBitWidthSupported(unsigned SizeInBits) {
2742 MaxDivRemBitWidthSupported = SizeInBits;
2743 }
2744
2745 /// Set the size in bits of the maximum fp convert the backend supports.
2746 /// Larger operations will be expanded by ExpandLargeFPConvert.
2747 void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) {
2748 MaxLargeFPConvertBitWidthSupported = SizeInBits;
2749 }
2750
2751 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2752 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2753 MinCmpXchgSizeInBits = SizeInBits;
2754 }
2755
2756 /// Sets whether unaligned atomic operations are supported.
2757 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2758 SupportsUnalignedAtomics = UnalignedSupported;
2759 }
2760
2761public:
2762 //===--------------------------------------------------------------------===//
2763 // Addressing mode description hooks (used by LSR etc).
2764 //
2765
2766 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2767 /// instructions reading the address. This allows as much computation as
2768 /// possible to be done in the address mode for that operand. This hook lets
2769 /// targets also pass back when this should be done on intrinsics which
2770 /// load/store.
2771 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2772 SmallVectorImpl<Value*> &/*Ops*/,
2773 Type *&/*AccessTy*/) const {
2774 return false;
2775 }
2776
2777 /// This represents an addressing mode of:
2778 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*vscale
2779 /// If BaseGV is null, there is no BaseGV.
2780 /// If BaseOffs is zero, there is no base offset.
2781 /// If HasBaseReg is false, there is no base register.
2782 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2783 /// no scale.
2784 /// If ScalableOffset is zero, there is no scalable offset.
2785 struct AddrMode {
2786 GlobalValue *BaseGV = nullptr;
2787 int64_t BaseOffs = 0;
2788 bool HasBaseReg = false;
2789 int64_t Scale = 0;
2790 int64_t ScalableOffset = 0;
2791 AddrMode() = default;
2792 };
2793
2794 /// Return true if the addressing mode represented by AM is legal for this
2795 /// target, for a load/store of the specified type.
2796 ///
2797 /// The type may be VoidTy, in which case only return true if the addressing
2798 /// mode is legal for a load/store of any legal type. TODO: Handle
2799 /// pre/postinc as well.
2800 ///
2801 /// If the address space cannot be determined, it will be -1.
2802 ///
2803 /// TODO: Remove default argument
2804 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2805 Type *Ty, unsigned AddrSpace,
2806 Instruction *I = nullptr) const;
2807
2808 /// Returns true if the targets addressing mode can target thread local
2809 /// storage (TLS).
2810 virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
2811 return false;
2812 }
2813
2814 /// Return the prefered common base offset.
2815 virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
2816 int64_t MaxOffset) const {
2817 return 0;
2818 }
2819
2820 /// Return true if the specified immediate is legal icmp immediate, that is
2821 /// the target has icmp instructions which can compare a register against the
2822 /// immediate without having to materialize the immediate into a register.
2823 virtual bool isLegalICmpImmediate(int64_t) const {
2824 return true;
2825 }
2826
2827 /// Return true if the specified immediate is legal add immediate, that is the
2828 /// target has add instructions which can add a register with the immediate
2829 /// without having to materialize the immediate into a register.
2830 virtual bool isLegalAddImmediate(int64_t) const {
2831 return true;
2832 }
2833
2834 /// Return true if adding the specified scalable immediate is legal, that is
2835 /// the target has add instructions which can add a register with the
2836 /// immediate (multiplied by vscale) without having to materialize the
2837 /// immediate into a register.
2838 virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
2839
2840 /// Return true if the specified immediate is legal for the value input of a
2841 /// store instruction.
2842 virtual bool isLegalStoreImmediate(int64_t Value) const {
2843 // Default implementation assumes that at least 0 works since it is likely
2844 // that a zero register exists or a zero immediate is allowed.
2845 return Value == 0;
2846 }
2847
2848 /// Return true if it's significantly cheaper to shift a vector by a uniform
2849 /// scalar than by an amount which will vary across each lane. On x86 before
2850 /// AVX2 for example, there is a "psllw" instruction for the former case, but
2851 /// no simple instruction for a general "a << b" operation on vectors.
2852 /// This should also apply to lowering for vector funnel shifts (rotates).
2853 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2854 return false;
2855 }
2856
2857 /// Given a shuffle vector SVI representing a vector splat, return a new
2858 /// scalar type of size equal to SVI's scalar type if the new type is more
2859 /// profitable. Returns nullptr otherwise. For example under MVE float splats
2860 /// are converted to integer to prevent the need to move from SPR to GPR
2861 /// registers.
2862 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const {
2863 return nullptr;
2864 }
2865
2866 /// Given a set in interconnected phis of type 'From' that are loaded/stored
2867 /// or bitcast to type 'To', return true if the set should be converted to
2868 /// 'To'.
2869 virtual bool shouldConvertPhiType(Type *From, Type *To) const {
2870 return (From->isIntegerTy() || From->isFloatingPointTy()) &&
2871 (To->isIntegerTy() || To->isFloatingPointTy());
2872 }
2873
2874 /// Returns true if the opcode is a commutative binary operation.
2875 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2876 // FIXME: This should get its info from the td file.
2877 switch (Opcode) {
2878 case ISD::ADD:
2879 case ISD::SMIN:
2880 case ISD::SMAX:
2881 case ISD::UMIN:
2882 case ISD::UMAX:
2883 case ISD::MUL:
2884 case ISD::MULHU:
2885 case ISD::MULHS:
2886 case ISD::SMUL_LOHI:
2887 case ISD::UMUL_LOHI:
2888 case ISD::FADD:
2889 case ISD::FMUL:
2890 case ISD::AND:
2891 case ISD::OR:
2892 case ISD::XOR:
2893 case ISD::SADDO:
2894 case ISD::UADDO:
2895 case ISD::ADDC:
2896 case ISD::ADDE:
2897 case ISD::SADDSAT:
2898 case ISD::UADDSAT:
2899 case ISD::FMINNUM:
2900 case ISD::FMAXNUM:
2901 case ISD::FMINNUM_IEEE:
2902 case ISD::FMAXNUM_IEEE:
2903 case ISD::FMINIMUM:
2904 case ISD::FMAXIMUM:
2905 case ISD::AVGFLOORS:
2906 case ISD::AVGFLOORU:
2907 case ISD::AVGCEILS:
2908 case ISD::AVGCEILU:
2909 case ISD::ABDS:
2910 case ISD::ABDU:
2911 return true;
2912 default: return false;
2913 }
2914 }
2915
2916 /// Return true if the node is a math/logic binary operator.
2917 virtual bool isBinOp(unsigned Opcode) const {
2918 // A commutative binop must be a binop.
2919 if (isCommutativeBinOp(Opcode))
2920 return true;
2921 // These are non-commutative binops.
2922 switch (Opcode) {
2923 case ISD::SUB:
2924 case ISD::SHL:
2925 case ISD::SRL:
2926 case ISD::SRA:
2927 case ISD::ROTL:
2928 case ISD::ROTR:
2929 case ISD::SDIV:
2930 case ISD::UDIV:
2931 case ISD::SREM:
2932 case ISD::UREM:
2933 case ISD::SSUBSAT:
2934 case ISD::USUBSAT:
2935 case ISD::FSUB:
2936 case ISD::FDIV:
2937 case ISD::FREM:
2938 return true;
2939 default:
2940 return false;
2941 }
2942 }
2943
2944 /// Return true if it's free to truncate a value of type FromTy to type
2945 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2946 /// by referencing its sub-register AX.
2947 /// Targets must return false when FromTy <= ToTy.
2948 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2949 return false;
2950 }
2951
2952 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2953 /// whether a call is in tail position. Typically this means that both results
2954 /// would be assigned to the same register or stack slot, but it could mean
2955 /// the target performs adequate checks of its own before proceeding with the
2956 /// tail call. Targets must return false when FromTy <= ToTy.
2957 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2958 return false;
2959 }
2960
2961 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
2962 virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
2963 LLVMContext &Ctx) const {
2964 return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx),
2965 ToVT: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx));
2966 }
2967
2968 /// Return true if truncating the specific node Val to type VT2 is free.
2969 virtual bool isTruncateFree(SDValue Val, EVT VT2) const {
2970 // Fallback to type matching.
2971 return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2);
2972 }
2973
2974 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2975
2976 /// Return true if the extension represented by \p I is free.
2977 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2978 /// this method can use the context provided by \p I to decide
2979 /// whether or not \p I is free.
2980 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2981 /// In other words, if is[Z|FP]Free returns true, then this method
2982 /// returns true as well. The converse is not true.
2983 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2984 /// \pre \p I must be a sign, zero, or fp extension.
2985 bool isExtFree(const Instruction *I) const {
2986 switch (I->getOpcode()) {
2987 case Instruction::FPExt:
2988 if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()),
2989 SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType())))
2990 return true;
2991 break;
2992 case Instruction::ZExt:
2993 if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType()))
2994 return true;
2995 break;
2996 case Instruction::SExt:
2997 break;
2998 default:
2999 llvm_unreachable("Instruction is not an extension");
3000 }
3001 return isExtFreeImpl(I);
3002 }
3003
3004 /// Return true if \p Load and \p Ext can form an ExtLoad.
3005 /// For example, in AArch64
3006 /// %L = load i8, i8* %ptr
3007 /// %E = zext i8 %L to i32
3008 /// can be lowered into one load instruction
3009 /// ldrb w0, [x0]
3010 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
3011 const DataLayout &DL) const {
3012 EVT VT = getValueType(DL, Ty: Ext->getType());
3013 EVT LoadVT = getValueType(DL, Ty: Load->getType());
3014
3015 // If the load has other users and the truncate is not free, the ext
3016 // probably isn't free.
3017 if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) &&
3018 !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType()))
3019 return false;
3020
3021 // Check whether the target supports casts folded into loads.
3022 unsigned LType;
3023 if (isa<ZExtInst>(Val: Ext))
3024 LType = ISD::ZEXTLOAD;
3025 else {
3026 assert(isa<SExtInst>(Ext) && "Unexpected ext type!");
3027 LType = ISD::SEXTLOAD;
3028 }
3029
3030 return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT);
3031 }
3032
3033 /// Return true if any actual instruction that defines a value of type FromTy
3034 /// implicitly zero-extends the value to ToTy in the result register.
3035 ///
3036 /// The function should return true when it is likely that the truncate can
3037 /// be freely folded with an instruction defining a value of FromTy. If
3038 /// the defining instruction is unknown (because you're looking at a
3039 /// function argument, PHI, etc.) then the target may require an
3040 /// explicit truncate, which is not necessarily free, but this function
3041 /// does not deal with those cases.
3042 /// Targets must return false when FromTy >= ToTy.
3043 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
3044 return false;
3045 }
3046
3047 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
3048 virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
3049 LLVMContext &Ctx) const {
3050 return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx),
3051 ToTy: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx));
3052 }
3053
3054 /// Return true if zero-extending the specific node Val to type VT2 is free
3055 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
3056 /// because it's folded such as X86 zero-extending loads).
3057 virtual bool isZExtFree(SDValue Val, EVT VT2) const {
3058 return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2);
3059 }
3060
3061 /// Return true if sign-extension from FromTy to ToTy is cheaper than
3062 /// zero-extension.
3063 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
3064 return false;
3065 }
3066
3067 /// Return true if this constant should be sign extended when promoting to
3068 /// a larger type.
3069 virtual bool signExtendConstant(const ConstantInt *C) const { return false; }
3070
3071 /// Return true if sinking I's operands to the same basic block as I is
3072 /// profitable, e.g. because the operands can be folded into a target
3073 /// instruction during instruction selection. After calling the function
3074 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
3075 /// come first).
3076 virtual bool shouldSinkOperands(Instruction *I,
3077 SmallVectorImpl<Use *> &Ops) const {
3078 return false;
3079 }
3080
3081 /// Try to optimize extending or truncating conversion instructions (like
3082 /// zext, trunc, fptoui, uitofp) for the target.
3083 virtual bool
3084 optimizeExtendOrTruncateConversion(Instruction *I, Loop *L,
3085 const TargetTransformInfo &TTI) const {
3086 return false;
3087 }
3088
3089 /// Return true if the target supplies and combines to a paired load
3090 /// two loaded values of type LoadedType next to each other in memory.
3091 /// RequiredAlignment gives the minimal alignment constraints that must be met
3092 /// to be able to select this paired load.
3093 ///
3094 /// This information is *not* used to generate actual paired loads, but it is
3095 /// used to generate a sequence of loads that is easier to combine into a
3096 /// paired load.
3097 /// For instance, something like this:
3098 /// a = load i64* addr
3099 /// b = trunc i64 a to i32
3100 /// c = lshr i64 a, 32
3101 /// d = trunc i64 c to i32
3102 /// will be optimized into:
3103 /// b = load i32* addr1
3104 /// d = load i32* addr2
3105 /// Where addr1 = addr2 +/- sizeof(i32).
3106 ///
3107 /// In other words, unless the target performs a post-isel load combining,
3108 /// this information should not be provided because it will generate more
3109 /// loads.
3110 virtual bool hasPairedLoad(EVT /*LoadedType*/,
3111 Align & /*RequiredAlignment*/) const {
3112 return false;
3113 }
3114
3115 /// Return true if the target has a vector blend instruction.
3116 virtual bool hasVectorBlend() const { return false; }
3117
3118 /// Get the maximum supported factor for interleaved memory accesses.
3119 /// Default to be the minimum interleave factor: 2.
3120 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
3121
3122 /// Lower an interleaved load to target specific intrinsics. Return
3123 /// true on success.
3124 ///
3125 /// \p LI is the vector load instruction.
3126 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
3127 /// \p Indices is the corresponding indices for each shufflevector.
3128 /// \p Factor is the interleave factor.
3129 virtual bool lowerInterleavedLoad(LoadInst *LI,
3130 ArrayRef<ShuffleVectorInst *> Shuffles,
3131 ArrayRef<unsigned> Indices,
3132 unsigned Factor) const {
3133 return false;
3134 }
3135
3136 /// Lower an interleaved store to target specific intrinsics. Return
3137 /// true on success.
3138 ///
3139 /// \p SI is the vector store instruction.
3140 /// \p SVI is the shufflevector to RE-interleave the stored vector.
3141 /// \p Factor is the interleave factor.
3142 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
3143 unsigned Factor) const {
3144 return false;
3145 }
3146
3147 /// Lower a deinterleave intrinsic to a target specific load intrinsic.
3148 /// Return true on success. Currently only supports
3149 /// llvm.experimental.vector.deinterleave2
3150 ///
3151 /// \p DI is the deinterleave intrinsic.
3152 /// \p LI is the accompanying load instruction
3153 virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
3154 LoadInst *LI) const {
3155 return false;
3156 }
3157
3158 /// Lower an interleave intrinsic to a target specific store intrinsic.
3159 /// Return true on success. Currently only supports
3160 /// llvm.experimental.vector.interleave2
3161 ///
3162 /// \p II is the interleave intrinsic.
3163 /// \p SI is the accompanying store instruction
3164 virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
3165 StoreInst *SI) const {
3166 return false;
3167 }
3168
3169 /// Return true if an fpext operation is free (for instance, because
3170 /// single-precision floating-point numbers are implicitly extended to
3171 /// double-precision).
3172 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
3173 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&
3174 "invalid fpext types");
3175 return false;
3176 }
3177
3178 /// Return true if an fpext operation input to an \p Opcode operation is free
3179 /// (for instance, because half-precision floating-point numbers are
3180 /// implicitly extended to float-precision) for an FMA instruction.
3181 virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
3182 LLT DestTy, LLT SrcTy) const {
3183 return false;
3184 }
3185
3186 /// Return true if an fpext operation input to an \p Opcode operation is free
3187 /// (for instance, because half-precision floating-point numbers are
3188 /// implicitly extended to float-precision) for an FMA instruction.
3189 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
3190 EVT DestVT, EVT SrcVT) const {
3191 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
3192 "invalid fpext types");
3193 return isFPExtFree(DestVT, SrcVT);
3194 }
3195
3196 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
3197 /// extend node) is profitable.
3198 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
3199
3200 /// Return true if an fneg operation is free to the point where it is never
3201 /// worthwhile to replace it with a bitwise operation.
3202 virtual bool isFNegFree(EVT VT) const {
3203 assert(VT.isFloatingPoint());
3204 return false;
3205 }
3206
3207 /// Return true if an fabs operation is free to the point where it is never
3208 /// worthwhile to replace it with a bitwise operation.
3209 virtual bool isFAbsFree(EVT VT) const {
3210 assert(VT.isFloatingPoint());
3211 return false;
3212 }
3213
3214 /// Return true if an FMA operation is faster than a pair of fmul and fadd
3215 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
3216 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
3217 ///
3218 /// NOTE: This may be called before legalization on types for which FMAs are
3219 /// not legal, but should return true if those types will eventually legalize
3220 /// to types that support FMAs. After legalization, it will only be called on
3221 /// types that support FMAs (via Legal or Custom actions)
3222 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3223 EVT) const {
3224 return false;
3225 }
3226
3227 /// Return true if an FMA operation is faster than a pair of fmul and fadd
3228 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
3229 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
3230 ///
3231 /// NOTE: This may be called before legalization on types for which FMAs are
3232 /// not legal, but should return true if those types will eventually legalize
3233 /// to types that support FMAs. After legalization, it will only be called on
3234 /// types that support FMAs (via Legal or Custom actions)
3235 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3236 LLT) const {
3237 return false;
3238 }
3239
3240 /// IR version
3241 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
3242 return false;
3243 }
3244
3245 /// Returns true if \p MI can be combined with another instruction to
3246 /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD,
3247 /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be
3248 /// distributed into an fadd/fsub.
3249 virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const {
3250 assert((MI.getOpcode() == TargetOpcode::G_FADD ||
3251 MI.getOpcode() == TargetOpcode::G_FSUB ||
3252 MI.getOpcode() == TargetOpcode::G_FMUL) &&
3253 "unexpected node in FMAD forming combine");
3254 switch (Ty.getScalarSizeInBits()) {
3255 case 16:
3256 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16);
3257 case 32:
3258 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32);
3259 case 64:
3260 return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64);
3261 default:
3262 break;
3263 }
3264
3265 return false;
3266 }
3267
3268 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an
3269 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
3270 /// fadd/fsub.
3271 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const {
3272 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||
3273 N->getOpcode() == ISD::FMUL) &&
3274 "unexpected node in FMAD forming combine");
3275 return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0));
3276 }
3277
3278 // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
3279 // than FMUL and ADD is delegated to the machine combiner.
3280 virtual bool generateFMAsInMachineCombiner(EVT VT,
3281 CodeGenOptLevel OptLevel) const {
3282 return false;
3283 }
3284
3285 /// Return true if it's profitable to narrow operations of type SrcVT to
3286 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
3287 /// i32 to i16.
3288 virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
3289 return false;
3290 }
3291
3292 /// Return true if pulling a binary operation into a select with an identity
3293 /// constant is profitable. This is the inverse of an IR transform.
3294 /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X
3295 virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
3296 EVT VT) const {
3297 return false;
3298 }
3299
3300 /// Return true if it is beneficial to convert a load of a constant to
3301 /// just the constant itself.
3302 /// On some targets it might be more efficient to use a combination of
3303 /// arithmetic instructions to materialize the constant instead of loading it
3304 /// from a constant pool.
3305 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
3306 Type *Ty) const {
3307 return false;
3308 }
3309
3310 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
3311 /// from this source type with this index. This is needed because
3312 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
3313 /// the first element, and only the target knows which lowering is cheap.
3314 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
3315 unsigned Index) const {
3316 return false;
3317 }
3318
3319 /// Try to convert an extract element of a vector binary operation into an
3320 /// extract element followed by a scalar operation.
3321 virtual bool shouldScalarizeBinop(SDValue VecOp) const {
3322 return false;
3323 }
3324
3325 /// Return true if extraction of a scalar element from the given vector type
3326 /// at the given index is cheap. For example, if scalar operations occur on
3327 /// the same register file as vector operations, then an extract element may
3328 /// be a sub-register rename rather than an actual instruction.
3329 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
3330 return false;
3331 }
3332
3333 /// Try to convert math with an overflow comparison into the corresponding DAG
3334 /// node operation. Targets may want to override this independently of whether
3335 /// the operation is legal/custom for the given type because it may obscure
3336 /// matching of other patterns.
3337 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
3338 bool MathUsed) const {
3339 // TODO: The default logic is inherited from code in CodeGenPrepare.
3340 // The opcode should not make a difference by default?
3341 if (Opcode != ISD::UADDO)
3342 return false;
3343
3344 // Allow the transform as long as we have an integer type that is not
3345 // obviously illegal and unsupported and if the math result is used
3346 // besides the overflow check. On some targets (e.g. SPARC), it is
3347 // not profitable to form on overflow op if the math result has no
3348 // concrete users.
3349 if (VT.isVector())
3350 return false;
3351 return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT));
3352 }
3353
3354 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
3355 // even if the vector itself has multiple uses.
3356 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
3357 return false;
3358 }
3359
3360 // Return true if CodeGenPrepare should consider splitting large offset of a
3361 // GEP to make the GEP fit into the addressing mode and can be sunk into the
3362 // same blocks of its users.
3363 virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
3364
3365 /// Return true if creating a shift of the type by the given
3366 /// amount is not profitable.
3367 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const {
3368 return false;
3369 }
3370
3371 // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
3372 // A) where y has a single bit set?
3373 virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
3374 const APInt &AndMask) const {
3375 unsigned ShCt = AndMask.getBitWidth() - 1;
3376 return !shouldAvoidTransformToShift(VT, Amount: ShCt);
3377 }
3378
3379 /// Does this target require the clearing of high-order bits in a register
3380 /// passed to the fp16 to fp conversion library function.
3381 virtual bool shouldKeepZExtForFP16Conv() const { return false; }
3382
3383 /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT
3384 /// from min(max(fptoi)) saturation patterns.
3385 virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const {
3386 return isOperationLegalOrCustom(Op, VT);
3387 }
3388
3389 /// Does this target support complex deinterleaving
3390 virtual bool isComplexDeinterleavingSupported() const { return false; }
3391
3392 /// Does this target support complex deinterleaving with the given operation
3393 /// and type
3394 virtual bool isComplexDeinterleavingOperationSupported(
3395 ComplexDeinterleavingOperation Operation, Type *Ty) const {
3396 return false;
3397 }
3398
3399 /// Create the IR node for the given complex deinterleaving operation.
3400 /// If one cannot be created using all the given inputs, nullptr should be
3401 /// returned.
3402 virtual Value *createComplexDeinterleavingIR(
3403 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
3404 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
3405 Value *Accumulator = nullptr) const {
3406 return nullptr;
3407 }
3408
3409 //===--------------------------------------------------------------------===//
3410 // Runtime Library hooks
3411 //
3412
3413 /// Rename the default libcall routine name for the specified libcall.
3414 void setLibcallName(RTLIB::Libcall Call, const char *Name) {
3415 LibcallRoutineNames[Call] = Name;
3416 }
3417 void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) {
3418 for (auto Call : Calls)
3419 setLibcallName(Call, Name);
3420 }
3421
3422 /// Get the libcall routine name for the specified libcall.
3423 const char *getLibcallName(RTLIB::Libcall Call) const {
3424 return LibcallRoutineNames[Call];
3425 }
3426
3427 /// Override the default CondCode to be used to test the result of the
3428 /// comparison libcall against zero.
3429 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
3430 CmpLibcallCCs[Call] = CC;
3431 }
3432
3433 /// Get the CondCode that's to be used to test the result of the comparison
3434 /// libcall against zero.
3435 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
3436 return CmpLibcallCCs[Call];
3437 }
3438
3439 /// Set the CallingConv that should be used for the specified libcall.
3440 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
3441 LibcallCallingConvs[Call] = CC;
3442 }
3443
3444 /// Get the CallingConv that should be used for the specified libcall.
3445 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
3446 return LibcallCallingConvs[Call];
3447 }
3448
3449 /// Execute target specific actions to finalize target lowering.
3450 /// This is used to set extra flags in MachineFrameInformation and freezing
3451 /// the set of reserved registers.
3452 /// The default implementation just freezes the set of reserved registers.
3453 virtual void finalizeLowering(MachineFunction &MF) const;
3454
3455 //===----------------------------------------------------------------------===//
3456 // GlobalISel Hooks
3457 //===----------------------------------------------------------------------===//
3458 /// Check whether or not \p MI needs to be moved close to its uses.
3459 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const;
3460
3461
3462private:
3463 const TargetMachine &TM;
3464
3465 /// Tells the code generator that the target has multiple (allocatable)
3466 /// condition registers that can be used to store the results of comparisons
3467 /// for use by selects and conditional branches. With multiple condition
3468 /// registers, the code generator will not aggressively sink comparisons into
3469 /// the blocks of their users.
3470 bool HasMultipleConditionRegisters;
3471
3472 /// Tells the code generator that the target has BitExtract instructions.
3473 /// The code generator will aggressively sink "shift"s into the blocks of
3474 /// their users if the users will generate "and" instructions which can be
3475 /// combined with "shift" to BitExtract instructions.
3476 bool HasExtractBitsInsn;
3477
3478 /// Tells the code generator to bypass slow divide or remainder
3479 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
3480 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
3481 /// div/rem when the operands are positive and less than 256.
3482 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
3483
3484 /// Tells the code generator that it shouldn't generate extra flow control
3485 /// instructions and should attempt to combine flow control instructions via
3486 /// predication.
3487 bool JumpIsExpensive;
3488
3489 /// Information about the contents of the high-bits in boolean values held in
3490 /// a type wider than i1. See getBooleanContents.
3491 BooleanContent BooleanContents;
3492
3493 /// Information about the contents of the high-bits in boolean values held in
3494 /// a type wider than i1. See getBooleanContents.
3495 BooleanContent BooleanFloatContents;
3496
3497 /// Information about the contents of the high-bits in boolean vector values
3498 /// when the element type is wider than i1. See getBooleanContents.
3499 BooleanContent BooleanVectorContents;
3500
3501 /// The target scheduling preference: shortest possible total cycles or lowest
3502 /// register usage.
3503 Sched::Preference SchedPreferenceInfo;
3504
3505 /// The minimum alignment that any argument on the stack needs to have.
3506 Align MinStackArgumentAlignment;
3507
3508 /// The minimum function alignment (used when optimizing for size, and to
3509 /// prevent explicitly provided alignment from leading to incorrect code).
3510 Align MinFunctionAlignment;
3511
3512 /// The preferred function alignment (used when alignment unspecified and
3513 /// optimizing for speed).
3514 Align PrefFunctionAlignment;
3515
3516 /// The preferred loop alignment (in log2 bot in bytes).
3517 Align PrefLoopAlignment;
3518 /// The maximum amount of bytes permitted to be emitted for alignment.
3519 unsigned MaxBytesForAlignment;
3520
3521 /// Size in bits of the maximum atomics size the backend supports.
3522 /// Accesses larger than this will be expanded by AtomicExpandPass.
3523 unsigned MaxAtomicSizeInBitsSupported;
3524
3525 /// Size in bits of the maximum div/rem size the backend supports.
3526 /// Larger operations will be expanded by ExpandLargeDivRem.
3527 unsigned MaxDivRemBitWidthSupported;
3528
3529 /// Size in bits of the maximum larget fp convert size the backend
3530 /// supports. Larger operations will be expanded by ExpandLargeFPConvert.
3531 unsigned MaxLargeFPConvertBitWidthSupported;
3532
3533 /// Size in bits of the minimum cmpxchg or ll/sc operation the
3534 /// backend supports.
3535 unsigned MinCmpXchgSizeInBits;
3536
3537 /// This indicates if the target supports unaligned atomic operations.
3538 bool SupportsUnalignedAtomics;
3539
3540 /// If set to a physical register, this specifies the register that
3541 /// llvm.savestack/llvm.restorestack should save and restore.
3542 Register StackPointerRegisterToSaveRestore;
3543
3544 /// This indicates the default register class to use for each ValueType the
3545 /// target supports natively.
3546 const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE];
3547 uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE];
3548 MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE];
3549
3550 /// This indicates the "representative" register class to use for each
3551 /// ValueType the target supports natively. This information is used by the
3552 /// scheduler to track register pressure. By default, the representative
3553 /// register class is the largest legal super-reg register class of the
3554 /// register class of the specified type. e.g. On x86, i8, i16, and i32's
3555 /// representative class would be GR32.
3556 const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0};
3557
3558 /// This indicates the "cost" of the "representative" register class for each
3559 /// ValueType. The cost is used by the scheduler to approximate register
3560 /// pressure.
3561 uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE];
3562
3563 /// For any value types we are promoting or expanding, this contains the value
3564 /// type that we are changing to. For Expanded types, this contains one step
3565 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
3566 /// (e.g. i64 -> i16). For types natively supported by the system, this holds
3567 /// the same type (e.g. i32 -> i32).
3568 MVT TransformToType[MVT::VALUETYPE_SIZE];
3569
3570 /// For each operation and each value type, keep a LegalizeAction that
3571 /// indicates how instruction selection should deal with the operation. Most
3572 /// operations are Legal (aka, supported natively by the target), but
3573 /// operations that are not should be described. Note that operations on
3574 /// non-legal value types are not described here.
3575 LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END];
3576
3577 /// For each load extension type and each value type, keep a LegalizeAction
3578 /// that indicates how instruction selection should deal with a load of a
3579 /// specific value type and extension type. Uses 4-bits to store the action
3580 /// for each of the 4 load ext types.
3581 uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3582
3583 /// Similar to LoadExtActions, but for atomic loads. Only Legal or Expand
3584 /// (default) values are supported.
3585 uint16_t AtomicLoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3586
3587 /// For each value type pair keep a LegalizeAction that indicates whether a
3588 /// truncating store of a specific value type and truncating type is legal.
3589 LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE];
3590
3591 /// For each indexed mode and each value type, keep a quad of LegalizeAction
3592 /// that indicates how instruction selection should deal with the load /
3593 /// store / maskedload / maskedstore.
3594 ///
3595 /// The first dimension is the value_type for the reference. The second
3596 /// dimension represents the various modes for load store.
3597 uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE];
3598
3599 /// For each condition code (ISD::CondCode) keep a LegalizeAction that
3600 /// indicates how instruction selection should deal with the condition code.
3601 ///
3602 /// Because each CC action takes up 4 bits, we need to have the array size be
3603 /// large enough to fit all of the value types. This can be done by rounding
3604 /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8.
3605 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8];
3606
3607 ValueTypeActionImpl ValueTypeActions;
3608
3609private:
3610 /// Targets can specify ISD nodes that they would like PerformDAGCombine
3611 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
3612 /// array.
3613 unsigned char
3614 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT];
3615
3616 /// For operations that must be promoted to a specific type, this holds the
3617 /// destination type. This map should be sparse, so don't hold it as an
3618 /// array.
3619 ///
3620 /// Targets add entries to this map with AddPromotedToType(..), clients access
3621 /// this with getTypeToPromoteTo(..).
3622 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
3623 PromoteToType;
3624
3625 /// Stores the name each libcall.
3626 const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1];
3627
3628 /// The ISD::CondCode that should be used to test the result of each of the
3629 /// comparison libcall against zero.
3630 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
3631
3632 /// Stores the CallingConv that should be used for each libcall.
3633 CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
3634
3635 /// Set default libcall names and calling conventions.
3636 void InitLibcalls(const Triple &TT);
3637
3638 /// The bits of IndexedModeActions used to store the legalisation actions
3639 /// We store the data as | ML | MS | L | S | each taking 4 bits.
3640 enum IndexedModeActionsBits {
3641 IMAB_Store = 0,
3642 IMAB_Load = 4,
3643 IMAB_MaskedStore = 8,
3644 IMAB_MaskedLoad = 12
3645 };
3646
3647 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift,
3648 LegalizeAction Action) {
3649 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&
3650 (unsigned)Action < 0xf && "Table isn't big enough!");
3651 unsigned Ty = (unsigned)VT.SimpleTy;
3652 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift);
3653 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift;
3654 }
3655
3656 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT,
3657 unsigned Shift) const {
3658 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&
3659 "Table isn't big enough!");
3660 unsigned Ty = (unsigned)VT.SimpleTy;
3661 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf);
3662 }
3663
3664protected:
3665 /// Return true if the extension represented by \p I is free.
3666 /// \pre \p I is a sign, zero, or fp extension and
3667 /// is[Z|FP]ExtFree of the related types is not true.
3668 virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
3669
3670 /// Depth that GatherAllAliases should continue looking for chain
3671 /// dependencies when trying to find a more preferable chain. As an
3672 /// approximation, this should be more than the number of consecutive stores
3673 /// expected to be merged.
3674 unsigned GatherAllAliasesMaxDepth;
3675
3676 /// \brief Specify maximum number of store instructions per memset call.
3677 ///
3678 /// When lowering \@llvm.memset this field specifies the maximum number of
3679 /// store operations that may be substituted for the call to memset. Targets
3680 /// must set this value based on the cost threshold for that target. Targets
3681 /// should assume that the memset will be done using as many of the largest
3682 /// store operations first, followed by smaller ones, if necessary, per
3683 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
3684 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
3685 /// store. This only applies to setting a constant array of a constant size.
3686 unsigned MaxStoresPerMemset;
3687 /// Likewise for functions with the OptSize attribute.
3688 unsigned MaxStoresPerMemsetOptSize;
3689
3690 /// \brief Specify maximum number of store instructions per memcpy call.
3691 ///
3692 /// When lowering \@llvm.memcpy this field specifies the maximum number of
3693 /// store operations that may be substituted for a call to memcpy. Targets
3694 /// must set this value based on the cost threshold for that target. Targets
3695 /// should assume that the memcpy will be done using as many of the largest
3696 /// store operations first, followed by smaller ones, if necessary, per
3697 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
3698 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
3699 /// and one 1-byte store. This only applies to copying a constant array of
3700 /// constant size.
3701 unsigned MaxStoresPerMemcpy;
3702 /// Likewise for functions with the OptSize attribute.
3703 unsigned MaxStoresPerMemcpyOptSize;
3704 /// \brief Specify max number of store instructions to glue in inlined memcpy.
3705 ///
3706 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
3707 /// of store instructions to keep together. This helps in pairing and
3708 // vectorization later on.
3709 unsigned MaxGluedStoresPerMemcpy = 0;
3710
3711 /// \brief Specify maximum number of load instructions per memcmp call.
3712 ///
3713 /// When lowering \@llvm.memcmp this field specifies the maximum number of
3714 /// pairs of load operations that may be substituted for a call to memcmp.
3715 /// Targets must set this value based on the cost threshold for that target.
3716 /// Targets should assume that the memcmp will be done using as many of the
3717 /// largest load operations first, followed by smaller ones, if necessary, per
3718 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
3719 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
3720 /// and one 1-byte load. This only applies to copying a constant array of
3721 /// constant size.
3722 unsigned MaxLoadsPerMemcmp;
3723 /// Likewise for functions with the OptSize attribute.
3724 unsigned MaxLoadsPerMemcmpOptSize;
3725
3726 /// \brief Specify maximum number of store instructions per memmove call.
3727 ///
3728 /// When lowering \@llvm.memmove this field specifies the maximum number of
3729 /// store instructions that may be substituted for a call to memmove. Targets
3730 /// must set this value based on the cost threshold for that target. Targets
3731 /// should assume that the memmove will be done using as many of the largest
3732 /// store operations first, followed by smaller ones, if necessary, per
3733 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
3734 /// with 8-bit alignment would result in nine 1-byte stores. This only
3735 /// applies to copying a constant array of constant size.
3736 unsigned MaxStoresPerMemmove;
3737 /// Likewise for functions with the OptSize attribute.
3738 unsigned MaxStoresPerMemmoveOptSize;
3739
3740 /// Tells the code generator that select is more expensive than a branch if
3741 /// the branch is usually predicted right.
3742 bool PredictableSelectIsExpensive;
3743
3744 /// \see enableExtLdPromotion.
3745 bool EnableExtLdPromotion;
3746
3747 /// Return true if the value types that can be represented by the specified
3748 /// register class are all legal.
3749 bool isLegalRC(const TargetRegisterInfo &TRI,
3750 const TargetRegisterClass &RC) const;
3751
3752 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
3753 /// sequence of memory operands that is recognized by PrologEpilogInserter.
3754 MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
3755 MachineBasicBlock *MBB) const;
3756
3757 bool IsStrictFPEnabled;
3758};
3759
3760/// This class defines information used to lower LLVM code to legal SelectionDAG
3761/// operators that the target instruction selector can accept natively.
3762///
3763/// This class also defines callbacks that targets must implement to lower
3764/// target-specific constructs to SelectionDAG operators.
3765class TargetLowering : public TargetLoweringBase {
3766public:
3767 struct DAGCombinerInfo;
3768 struct MakeLibCallOptions;
3769
3770 TargetLowering(const TargetLowering &) = delete;
3771 TargetLowering &operator=(const TargetLowering &) = delete;
3772
3773 explicit TargetLowering(const TargetMachine &TM);
3774
3775 bool isPositionIndependent() const;
3776
3777 virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
3778 FunctionLoweringInfo *FLI,
3779 UniformityInfo *UA) const {
3780 return false;
3781 }
3782
3783 // Lets target to control the following reassociation of operands: (op (op x,
3784 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
3785 // default consider profitable any case where N0 has single use. This
3786 // behavior reflects the condition replaced by this target hook call in the
3787 // DAGCombiner. Any particular target can implement its own heuristic to
3788 // restrict common combiner.
3789 virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
3790 SDValue N1) const {
3791 return N0.hasOneUse();
3792 }
3793
3794 // Lets target to control the following reassociation of operands: (op (op x,
3795 // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
3796 // default consider profitable any case where N0 has single use. This
3797 // behavior reflects the condition replaced by this target hook call in the
3798 // combiner. Any particular target can implement its own heuristic to
3799 // restrict common combiner.
3800 virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
3801 Register N1) const {
3802 return MRI.hasOneNonDBGUse(RegNo: N0);
3803 }
3804
3805 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
3806 return false;
3807 }
3808
3809 /// Returns true by value, base pointer and offset pointer and addressing mode
3810 /// by reference if the node's address can be legally represented as
3811 /// pre-indexed load / store address.
3812 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
3813 SDValue &/*Offset*/,
3814 ISD::MemIndexedMode &/*AM*/,
3815 SelectionDAG &/*DAG*/) const {
3816 return false;
3817 }
3818
3819 /// Returns true by value, base pointer and offset pointer and addressing mode
3820 /// by reference if this node can be combined with a load / store to form a
3821 /// post-indexed load / store.
3822 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
3823 SDValue &/*Base*/,
3824 SDValue &/*Offset*/,
3825 ISD::MemIndexedMode &/*AM*/,
3826 SelectionDAG &/*DAG*/) const {
3827 return false;
3828 }
3829
3830 /// Returns true if the specified base+offset is a legal indexed addressing
3831 /// mode for this target. \p MI is the load or store instruction that is being
3832 /// considered for transformation.
3833 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
3834 bool IsPre, MachineRegisterInfo &MRI) const {
3835 return false;
3836 }
3837
3838 /// Return the entry encoding for a jump table in the current function. The
3839 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
3840 virtual unsigned getJumpTableEncoding() const;
3841
3842 virtual const MCExpr *
3843 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
3844 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
3845 MCContext &/*Ctx*/) const {
3846 llvm_unreachable("Need to implement this hook if target has custom JTIs");
3847 }
3848
3849 /// Returns relocation base for the given PIC jumptable.
3850 virtual SDValue getPICJumpTableRelocBase(SDValue Table,
3851 SelectionDAG &DAG) const;
3852
3853 /// This returns the relocation base for the given PIC jumptable, the same as
3854 /// getPICJumpTableRelocBase, but as an MCExpr.
3855 virtual const MCExpr *
3856 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3857 unsigned JTI, MCContext &Ctx) const;
3858
3859 /// Return true if folding a constant offset with the given GlobalAddress is
3860 /// legal. It is frequently not legal in PIC relocation models.
3861 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
3862
3863 /// On x86, return true if the operand with index OpNo is a CALL or JUMP
3864 /// instruction, which can use either a memory constraint or an address
3865 /// constraint. -fasm-blocks "__asm call foo" lowers to
3866 /// call void asm sideeffect inteldialect "call ${0:P}", "*m..."
3867 ///
3868 /// This function is used by a hack to choose the address constraint,
3869 /// lowering to a direct call.
3870 virtual bool
3871 isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
3872 unsigned OpNo) const {
3873 return false;
3874 }
3875
3876 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
3877 SDValue &Chain) const;
3878
3879 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3880 SDValue &NewRHS, ISD::CondCode &CCCode,
3881 const SDLoc &DL, const SDValue OldLHS,
3882 const SDValue OldRHS) const;
3883
3884 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3885 SDValue &NewRHS, ISD::CondCode &CCCode,
3886 const SDLoc &DL, const SDValue OldLHS,
3887 const SDValue OldRHS, SDValue &Chain,
3888 bool IsSignaling = false) const;
3889
3890 /// Returns a pair of (return value, chain).
3891 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
3892 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
3893 EVT RetVT, ArrayRef<SDValue> Ops,
3894 MakeLibCallOptions CallOptions,
3895 const SDLoc &dl,
3896 SDValue Chain = SDValue()) const;
3897
3898 /// Check whether parameters to a call that are passed in callee saved
3899 /// registers are the same as from the calling function. This needs to be
3900 /// checked for tail call eligibility.
3901 bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
3902 const uint32_t *CallerPreservedMask,
3903 const SmallVectorImpl<CCValAssign> &ArgLocs,
3904 const SmallVectorImpl<SDValue> &OutVals) const;
3905
3906 //===--------------------------------------------------------------------===//
3907 // TargetLowering Optimization Methods
3908 //
3909
3910 /// A convenience struct that encapsulates a DAG, and two SDValues for
3911 /// returning information from TargetLowering to its clients that want to
3912 /// combine.
3913 struct TargetLoweringOpt {
3914 SelectionDAG &DAG;
3915 bool LegalTys;
3916 bool LegalOps;
3917 SDValue Old;
3918 SDValue New;
3919
3920 explicit TargetLoweringOpt(SelectionDAG &InDAG,
3921 bool LT, bool LO) :
3922 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
3923
3924 bool LegalTypes() const { return LegalTys; }
3925 bool LegalOperations() const { return LegalOps; }
3926
3927 bool CombineTo(SDValue O, SDValue N) {
3928 Old = O;
3929 New = N;
3930 return true;
3931 }
3932 };
3933
3934 /// Determines the optimal series of memory ops to replace the memset / memcpy.
3935 /// Return true if the number of memory ops is below the threshold (Limit).
3936 /// Note that this is always the case when Limit is ~0.
3937 /// It returns the types of the sequence of memory ops to perform
3938 /// memset / memcpy by reference.
3939 virtual bool
3940 findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
3941 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
3942 const AttributeList &FuncAttributes) const;
3943
3944 /// Check to see if the specified operand of the specified instruction is a
3945 /// constant integer. If so, check to see if there are any bits set in the
3946 /// constant that are not demanded. If so, shrink the constant and return
3947 /// true.
3948 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
3949 const APInt &DemandedElts,
3950 TargetLoweringOpt &TLO) const;
3951
3952 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements.
3953 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
3954 TargetLoweringOpt &TLO) const;
3955
3956 // Target hook to do target-specific const optimization, which is called by
3957 // ShrinkDemandedConstant. This function should return true if the target
3958 // doesn't want ShrinkDemandedConstant to further optimize the constant.
3959 virtual bool targetShrinkDemandedConstant(SDValue Op,
3960 const APInt &DemandedBits,
3961 const APInt &DemandedElts,
3962 TargetLoweringOpt &TLO) const {
3963 return false;
3964 }
3965
3966 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
3967 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
3968 /// but it could be generalized for targets with other types of implicit
3969 /// widening casts.
3970 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
3971 const APInt &DemandedBits,
3972 TargetLoweringOpt &TLO) const;
3973
3974 /// Look at Op. At this point, we know that only the DemandedBits bits of the
3975 /// result of Op are ever used downstream. If we can use this information to
3976 /// simplify Op, create a new simplified DAG node and return true, returning
3977 /// the original and new nodes in Old and New. Otherwise, analyze the
3978 /// expression and return a mask of KnownOne and KnownZero bits for the
3979 /// expression (used to simplify the caller). The KnownZero/One bits may only
3980 /// be accurate for those bits in the Demanded masks.
3981 /// \p AssumeSingleUse When this parameter is true, this function will
3982 /// attempt to simplify \p Op even if there are multiple uses.
3983 /// Callers are responsible for correctly updating the DAG based on the
3984 /// results of this function, because simply replacing TLO.Old
3985 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
3986 /// has multiple uses.
3987 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3988 const APInt &DemandedElts, KnownBits &Known,
3989 TargetLoweringOpt &TLO, unsigned Depth = 0,
3990 bool AssumeSingleUse = false) const;
3991
3992 /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
3993 /// Adds Op back to the worklist upon success.
3994 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3995 KnownBits &Known, TargetLoweringOpt &TLO,
3996 unsigned Depth = 0,
3997 bool AssumeSingleUse = false) const;
3998
3999 /// Helper wrapper around SimplifyDemandedBits.
4000 /// Adds Op back to the worklist upon success.
4001 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4002 DAGCombinerInfo &DCI) const;
4003
4004 /// Helper wrapper around SimplifyDemandedBits.
4005 /// Adds Op back to the worklist upon success.
4006 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
4007 const APInt &DemandedElts,
4008 DAGCombinerInfo &DCI) const;
4009
4010 /// More limited version of SimplifyDemandedBits that can be used to "look
4011 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
4012 /// bitwise ops etc.
4013 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
4014 const APInt &DemandedElts,
4015 SelectionDAG &DAG,
4016 unsigned Depth = 0) const;
4017
4018 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
4019 /// elements.
4020 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
4021 SelectionDAG &DAG,
4022 unsigned Depth = 0) const;
4023
4024 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
4025 /// bits from only some vector elements.
4026 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op,
4027 const APInt &DemandedElts,
4028 SelectionDAG &DAG,
4029 unsigned Depth = 0) const;
4030
4031 /// Look at Vector Op. At this point, we know that only the DemandedElts
4032 /// elements of the result of Op are ever used downstream. If we can use
4033 /// this information to simplify Op, create a new simplified DAG node and
4034 /// return true, storing the original and new nodes in TLO.
4035 /// Otherwise, analyze the expression and return a mask of KnownUndef and
4036 /// KnownZero elements for the expression (used to simplify the caller).
4037 /// The KnownUndef/Zero elements may only be accurate for those bits
4038 /// in the DemandedMask.
4039 /// \p AssumeSingleUse When this parameter is true, this function will
4040 /// attempt to simplify \p Op even if there are multiple uses.
4041 /// Callers are responsible for correctly updating the DAG based on the
4042 /// results of this function, because simply replacing TLO.Old
4043 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
4044 /// has multiple uses.
4045 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
4046 APInt &KnownUndef, APInt &KnownZero,
4047 TargetLoweringOpt &TLO, unsigned Depth = 0,
4048 bool AssumeSingleUse = false) const;
4049
4050 /// Helper wrapper around SimplifyDemandedVectorElts.
4051 /// Adds Op back to the worklist upon success.
4052 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
4053 DAGCombinerInfo &DCI) const;
4054
4055 /// Return true if the target supports simplifying demanded vector elements by
4056 /// converting them to undefs.
4057 virtual bool
4058 shouldSimplifyDemandedVectorElts(SDValue Op,
4059 const TargetLoweringOpt &TLO) const {
4060 return true;
4061 }
4062
4063 /// Determine which of the bits specified in Mask are known to be either zero
4064 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
4065 /// argument allows us to only collect the known bits that are shared by the
4066 /// requested vector elements.
4067 virtual void computeKnownBitsForTargetNode(const SDValue Op,
4068 KnownBits &Known,
4069 const APInt &DemandedElts,
4070 const SelectionDAG &DAG,
4071 unsigned Depth = 0) const;
4072
4073 /// Determine which of the bits specified in Mask are known to be either zero
4074 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
4075 /// argument allows us to only collect the known bits that are shared by the
4076 /// requested vector elements. This is for GISel.
4077 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis,
4078 Register R, KnownBits &Known,
4079 const APInt &DemandedElts,
4080 const MachineRegisterInfo &MRI,
4081 unsigned Depth = 0) const;
4082
4083 /// Determine the known alignment for the pointer value \p R. This is can
4084 /// typically be inferred from the number of low known 0 bits. However, for a
4085 /// pointer with a non-integral address space, the alignment value may be
4086 /// independent from the known low bits.
4087 virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis,
4088 Register R,
4089 const MachineRegisterInfo &MRI,
4090 unsigned Depth = 0) const;
4091
4092 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
4093 /// Default implementation computes low bits based on alignment
4094 /// information. This should preserve known bits passed into it.
4095 virtual void computeKnownBitsForFrameIndex(int FIOp,
4096 KnownBits &Known,
4097 const MachineFunction &MF) const;
4098
4099 /// This method can be implemented by targets that want to expose additional
4100 /// information about sign bits to the DAG Combiner. The DemandedElts
4101 /// argument allows us to only collect the minimum sign bits that are shared
4102 /// by the requested vector elements.
4103 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
4104 const APInt &DemandedElts,
4105 const SelectionDAG &DAG,
4106 unsigned Depth = 0) const;
4107
4108 /// This method can be implemented by targets that want to expose additional
4109 /// information about sign bits to GlobalISel combiners. The DemandedElts
4110 /// argument allows us to only collect the minimum sign bits that are shared
4111 /// by the requested vector elements.
4112 virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
4113 Register R,
4114 const APInt &DemandedElts,
4115 const MachineRegisterInfo &MRI,
4116 unsigned Depth = 0) const;
4117
4118 /// Attempt to simplify any target nodes based on the demanded vector
4119 /// elements, returning true on success. Otherwise, analyze the expression and
4120 /// return a mask of KnownUndef and KnownZero elements for the expression
4121 /// (used to simplify the caller). The KnownUndef/Zero elements may only be
4122 /// accurate for those bits in the DemandedMask.
4123 virtual bool SimplifyDemandedVectorEltsForTargetNode(
4124 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
4125 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
4126
4127 /// Attempt to simplify any target nodes based on the demanded bits/elts,
4128 /// returning true on success. Otherwise, analyze the
4129 /// expression and return a mask of KnownOne and KnownZero bits for the
4130 /// expression (used to simplify the caller). The KnownZero/One bits may only
4131 /// be accurate for those bits in the Demanded masks.
4132 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
4133 const APInt &DemandedBits,
4134 const APInt &DemandedElts,
4135 KnownBits &Known,
4136 TargetLoweringOpt &TLO,
4137 unsigned Depth = 0) const;
4138
4139 /// More limited version of SimplifyDemandedBits that can be used to "look
4140 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
4141 /// bitwise ops etc.
4142 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
4143 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4144 SelectionDAG &DAG, unsigned Depth) const;
4145
4146 /// Return true if this function can prove that \p Op is never poison
4147 /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts
4148 /// argument limits the check to the requested vector elements.
4149 virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4150 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4151 bool PoisonOnly, unsigned Depth) const;
4152
4153 /// Return true if Op can create undef or poison from non-undef & non-poison
4154 /// operands. The DemandedElts argument limits the check to the requested
4155 /// vector elements.
4156 virtual bool
4157 canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts,
4158 const SelectionDAG &DAG, bool PoisonOnly,
4159 bool ConsiderFlags, unsigned Depth) const;
4160
4161 /// Tries to build a legal vector shuffle using the provided parameters
4162 /// or equivalent variations. The Mask argument maybe be modified as the
4163 /// function tries different variations.
4164 /// Returns an empty SDValue if the operation fails.
4165 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
4166 SDValue N1, MutableArrayRef<int> Mask,
4167 SelectionDAG &DAG) const;
4168
4169 /// This method returns the constant pool value that will be loaded by LD.
4170 /// NOTE: You must check for implicit extensions of the constant by LD.
4171 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
4172
4173 /// If \p SNaN is false, \returns true if \p Op is known to never be any
4174 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
4175 /// NaN.
4176 virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
4177 const SelectionDAG &DAG,
4178 bool SNaN = false,
4179 unsigned Depth = 0) const;
4180
4181 /// Return true if vector \p Op has the same value across all \p DemandedElts,
4182 /// indicating any elements which may be undef in the output \p UndefElts.
4183 virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
4184 APInt &UndefElts,
4185 const SelectionDAG &DAG,
4186 unsigned Depth = 0) const;
4187
4188 /// Returns true if the given Opc is considered a canonical constant for the
4189 /// target, which should not be transformed back into a BUILD_VECTOR.
4190 virtual bool isTargetCanonicalConstantNode(SDValue Op) const {
4191 return Op.getOpcode() == ISD::SPLAT_VECTOR ||
4192 Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS;
4193 }
4194
4195 struct DAGCombinerInfo {
4196 void *DC; // The DAG Combiner object.
4197 CombineLevel Level;
4198 bool CalledByLegalizer;
4199
4200 public:
4201 SelectionDAG &DAG;
4202
4203 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc)
4204 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
4205
4206 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
4207 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
4208 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; }
4209 CombineLevel getDAGCombineLevel() { return Level; }
4210 bool isCalledByLegalizer() const { return CalledByLegalizer; }
4211
4212 void AddToWorklist(SDNode *N);
4213 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
4214 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
4215 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
4216
4217 bool recursivelyDeleteUnusedNodes(SDNode *N);
4218
4219 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
4220 };
4221
4222 /// Return if the N is a constant or constant vector equal to the true value
4223 /// from getBooleanContents().
4224 bool isConstTrueVal(SDValue N) const;
4225
4226 /// Return if the N is a constant or constant vector equal to the false value
4227 /// from getBooleanContents().
4228 bool isConstFalseVal(SDValue N) const;
4229
4230 /// Return if \p N is a True value when extended to \p VT.
4231 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
4232
4233 /// Try to simplify a setcc built with the specified operands and cc. If it is
4234 /// unable to simplify it, return a null SDValue.
4235 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4236 bool foldBooleans, DAGCombinerInfo &DCI,
4237 const SDLoc &dl) const;
4238
4239 // For targets which wrap address, unwrap for analysis.
4240 virtual SDValue unwrapAddress(SDValue N) const { return N; }
4241
4242 /// Returns true (and the GlobalValue and the offset) if the node is a
4243 /// GlobalAddress + offset.
4244 virtual bool
4245 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
4246
4247 /// This method will be invoked for all target nodes and for any
4248 /// target-independent nodes that the target has registered with invoke it
4249 /// for.
4250 ///
4251 /// The semantics are as follows:
4252 /// Return Value:
4253 /// SDValue.Val == 0 - No change was made
4254 /// SDValue.Val == N - N was replaced, is dead, and is already handled.
4255 /// otherwise - N should be replaced by the returned Operand.
4256 ///
4257 /// In addition, methods provided by DAGCombinerInfo may be used to perform
4258 /// more complex transformations.
4259 ///
4260 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
4261
4262 /// Return true if it is profitable to move this shift by a constant amount
4263 /// through its operand, adjusting any immediate operands as necessary to
4264 /// preserve semantics. This transformation may not be desirable if it
4265 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
4266 /// extraction in AArch64). By default, it returns true.
4267 ///
4268 /// @param N the shift node
4269 /// @param Level the current DAGCombine legalization level.
4270 virtual bool isDesirableToCommuteWithShift(const SDNode *N,
4271 CombineLevel Level) const {
4272 return true;
4273 }
4274
4275 /// GlobalISel - return true if it is profitable to move this shift by a
4276 /// constant amount through its operand, adjusting any immediate operands as
4277 /// necessary to preserve semantics. This transformation may not be desirable
4278 /// if it disrupts a particularly auspicious target-specific tree (e.g.
4279 /// bitfield extraction in AArch64). By default, it returns true.
4280 ///
4281 /// @param MI the shift instruction
4282 /// @param IsAfterLegal true if running after legalization.
4283 virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI,
4284 bool IsAfterLegal) const {
4285 return true;
4286 }
4287
4288 /// GlobalISel - return true if it's profitable to perform the combine:
4289 /// shl ([sza]ext x), y => zext (shl x, y)
4290 virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const {
4291 return true;
4292 }
4293
4294 // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
4295 // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
4296 // writing this) is:
4297 // With C as a power of 2 and C != 0 and C != INT_MIN:
4298 // AddAnd:
4299 // (icmp eq A, C) | (icmp eq A, -C)
4300 // -> (icmp eq and(add(A, C), ~(C + C)), 0)
4301 // (icmp ne A, C) & (icmp ne A, -C)w
4302 // -> (icmp ne and(add(A, C), ~(C + C)), 0)
4303 // ABS:
4304 // (icmp eq A, C) | (icmp eq A, -C)
4305 // -> (icmp eq Abs(A), C)
4306 // (icmp ne A, C) & (icmp ne A, -C)w
4307 // -> (icmp ne Abs(A), C)
4308 //
4309 // @param LogicOp the logic op
4310 // @param SETCC0 the first of the SETCC nodes
4311 // @param SETCC0 the second of the SETCC nodes
4312 virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(
4313 const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
4314 return AndOrSETCCFoldKind::None;
4315 }
4316
4317 /// Return true if it is profitable to combine an XOR of a logical shift
4318 /// to create a logical shift of NOT. This transformation may not be desirable
4319 /// if it disrupts a particularly auspicious target-specific tree (e.g.
4320 /// BIC on ARM/AArch64). By default, it returns true.
4321 virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const {
4322 return true;
4323 }
4324
4325 /// Return true if the target has native support for the specified value type
4326 /// and it is 'desirable' to use the type for the given node type. e.g. On x86
4327 /// i16 is legal, but undesirable since i16 instruction encodings are longer
4328 /// and some i16 instructions are slow.
4329 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
4330 // By default, assume all legal types are desirable.
4331 return isTypeLegal(VT);
4332 }
4333
4334 /// Return true if it is profitable for dag combiner to transform a floating
4335 /// point op of specified opcode to a equivalent op of an integer
4336 /// type. e.g. f32 load -> i32 load can be profitable on ARM.
4337 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
4338 EVT /*VT*/) const {
4339 return false;
4340 }
4341
4342 /// This method query the target whether it is beneficial for dag combiner to
4343 /// promote the specified node. If true, it should return the desired
4344 /// promotion type by reference.
4345 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
4346 return false;
4347 }
4348
4349 /// Return true if the target supports swifterror attribute. It optimizes
4350 /// loads and stores to reading and writing a specific register.
4351 virtual bool supportSwiftError() const {
4352 return false;
4353 }
4354
4355 /// Return true if the target supports that a subset of CSRs for the given
4356 /// machine function is handled explicitly via copies.
4357 virtual bool supportSplitCSR(MachineFunction *MF) const {
4358 return false;
4359 }
4360
4361 /// Return true if the target supports kcfi operand bundles.
4362 virtual bool supportKCFIBundles() const { return false; }
4363
4364 /// Perform necessary initialization to handle a subset of CSRs explicitly
4365 /// via copies. This function is called at the beginning of instruction
4366 /// selection.
4367 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
4368 llvm_unreachable("Not Implemented");
4369 }
4370
4371 /// Insert explicit copies in entry and exit blocks. We copy a subset of
4372 /// CSRs to virtual registers in the entry block, and copy them back to
4373 /// physical registers in the exit blocks. This function is called at the end
4374 /// of instruction selection.
4375 virtual void insertCopiesSplitCSR(
4376 MachineBasicBlock *Entry,
4377 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
4378 llvm_unreachable("Not Implemented");
4379 }
4380
4381 /// Return the newly negated expression if the cost is not expensive and
4382 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
4383 /// do the negation.
4384 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
4385 bool LegalOps, bool OptForSize,
4386 NegatibleCost &Cost,
4387 unsigned Depth = 0) const;
4388
4389 SDValue getCheaperOrNeutralNegatedExpression(
4390 SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize,
4391 const NegatibleCost CostThreshold = NegatibleCost::Neutral,
4392 unsigned Depth = 0) const {
4393 NegatibleCost Cost = NegatibleCost::Expensive;
4394 SDValue Neg =
4395 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4396 if (!Neg)
4397 return SDValue();
4398
4399 if (Cost <= CostThreshold)
4400 return Neg;
4401
4402 // Remove the new created node to avoid the side effect to the DAG.
4403 if (Neg->use_empty())
4404 DAG.RemoveDeadNode(N: Neg.getNode());
4405 return SDValue();
4406 }
4407
4408 /// This is the helper function to return the newly negated expression only
4409 /// when the cost is cheaper.
4410 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
4411 bool LegalOps, bool OptForSize,
4412 unsigned Depth = 0) const {
4413 return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize,
4414 CostThreshold: NegatibleCost::Cheaper, Depth);
4415 }
4416
4417 /// This is the helper function to return the newly negated expression if
4418 /// the cost is not expensive.
4419 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
4420 bool OptForSize, unsigned Depth = 0) const {
4421 NegatibleCost Cost = NegatibleCost::Expensive;
4422 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
4423 }
4424
4425 //===--------------------------------------------------------------------===//
4426 // Lowering methods - These methods must be implemented by targets so that
4427 // the SelectionDAGBuilder code knows how to lower these.
4428 //
4429
4430 /// Target-specific splitting of values into parts that fit a register
4431 /// storing a legal type
4432 virtual bool splitValueIntoRegisterParts(
4433 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4434 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4435 return false;
4436 }
4437
4438 /// Allows the target to handle physreg-carried dependency
4439 /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether
4440 /// to add the edge to the dependency graph.
4441 /// Def - input: Selection DAG node defininfg physical register
4442 /// User - input: Selection DAG node using physical register
4443 /// Op - input: Number of User operand
4444 /// PhysReg - inout: set to the physical register if the edge is
4445 /// necessary, unchanged otherwise
4446 /// Cost - inout: physical register copy cost.
4447 /// Returns 'true' is the edge is necessary, 'false' otherwise
4448 virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
4449 const TargetRegisterInfo *TRI,
4450 const TargetInstrInfo *TII,
4451 unsigned &PhysReg, int &Cost) const {
4452 return false;
4453 }
4454
4455 /// Target-specific combining of register parts into its original value
4456 virtual SDValue
4457 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
4458 const SDValue *Parts, unsigned NumParts,
4459 MVT PartVT, EVT ValueVT,
4460 std::optional<CallingConv::ID> CC) const {
4461 return SDValue();
4462 }
4463
4464 /// This hook must be implemented to lower the incoming (formal) arguments,
4465 /// described by the Ins array, into the specified DAG. The implementation
4466 /// should fill in the InVals array with legal-type argument values, and
4467 /// return the resulting token chain value.
4468 virtual SDValue LowerFormalArguments(
4469 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
4470 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/,
4471 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const {
4472 llvm_unreachable("Not Implemented");
4473 }
4474
4475 /// This structure contains all information that is necessary for lowering
4476 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
4477 /// needs to lower a call, and targets will see this struct in their LowerCall
4478 /// implementation.
4479 struct CallLoweringInfo {
4480 SDValue Chain;
4481 Type *RetTy = nullptr;
4482 bool RetSExt : 1;
4483 bool RetZExt : 1;
4484 bool IsVarArg : 1;
4485 bool IsInReg : 1;
4486 bool DoesNotReturn : 1;
4487 bool IsReturnValueUsed : 1;
4488 bool IsConvergent : 1;
4489 bool IsPatchPoint : 1;
4490 bool IsPreallocated : 1;
4491 bool NoMerge : 1;
4492
4493 // IsTailCall should be modified by implementations of
4494 // TargetLowering::LowerCall that perform tail call conversions.
4495 bool IsTailCall = false;
4496
4497 // Is Call lowering done post SelectionDAG type legalization.
4498 bool IsPostTypeLegalization = false;
4499
4500 unsigned NumFixedArgs = -1;
4501 CallingConv::ID CallConv = CallingConv::C;
4502 SDValue Callee;
4503 ArgListTy Args;
4504 SelectionDAG &DAG;
4505 SDLoc DL;
4506 const CallBase *CB = nullptr;
4507 SmallVector<ISD::OutputArg, 32> Outs;
4508 SmallVector<SDValue, 32> OutVals;
4509 SmallVector<ISD::InputArg, 32> Ins;
4510 SmallVector<SDValue, 4> InVals;
4511 const ConstantInt *CFIType = nullptr;
4512 SDValue ConvergenceControlToken;
4513
4514 CallLoweringInfo(SelectionDAG &DAG)
4515 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
4516 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
4517 IsPatchPoint(false), IsPreallocated(false), NoMerge(false),
4518 DAG(DAG) {}
4519
4520 CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
4521 DL = dl;
4522 return *this;
4523 }
4524
4525 CallLoweringInfo &setChain(SDValue InChain) {
4526 Chain = InChain;
4527 return *this;
4528 }
4529
4530 // setCallee with target/module-specific attributes
4531 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType,
4532 SDValue Target, ArgListTy &&ArgsList) {
4533 RetTy = ResultType;
4534 Callee = Target;
4535 CallConv = CC;
4536 NumFixedArgs = ArgsList.size();
4537 Args = std::move(ArgsList);
4538
4539 DAG.getTargetLoweringInfo().markLibCallAttributes(
4540 MF: &(DAG.getMachineFunction()), CC, Args);
4541 return *this;
4542 }
4543
4544 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType,
4545 SDValue Target, ArgListTy &&ArgsList,
4546 AttributeSet ResultAttrs = {}) {
4547 RetTy = ResultType;
4548 IsInReg = ResultAttrs.hasAttribute(Attribute::InReg);
4549 RetSExt = ResultAttrs.hasAttribute(Attribute::SExt);
4550 RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt);
4551 NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge);
4552
4553 Callee = Target;
4554 CallConv = CC;
4555 NumFixedArgs = ArgsList.size();
4556 Args = std::move(ArgsList);
4557 return *this;
4558 }
4559
4560 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
4561 SDValue Target, ArgListTy &&ArgsList,
4562 const CallBase &Call) {
4563 RetTy = ResultType;
4564
4565 IsInReg = Call.hasRetAttr(Attribute::InReg);
4566 DoesNotReturn =
4567 Call.doesNotReturn() ||
4568 (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode()));
4569 IsVarArg = FTy->isVarArg();
4570 IsReturnValueUsed = !Call.use_empty();
4571 RetSExt = Call.hasRetAttr(Attribute::SExt);
4572 RetZExt = Call.hasRetAttr(Attribute::ZExt);
4573 NoMerge = Call.hasFnAttr(Attribute::NoMerge);
4574
4575 Callee = Target;
4576
4577 CallConv = Call.getCallingConv();
4578 NumFixedArgs = FTy->getNumParams();
4579 Args = std::move(ArgsList);
4580
4581 CB = &Call;
4582
4583 return *this;
4584 }
4585
4586 CallLoweringInfo &setInRegister(bool Value = true) {
4587 IsInReg = Value;
4588 return *this;
4589 }
4590
4591 CallLoweringInfo &setNoReturn(bool Value = true) {
4592 DoesNotReturn = Value;
4593 return *this;
4594 }
4595
4596 CallLoweringInfo &setVarArg(bool Value = true) {
4597 IsVarArg = Value;
4598 return *this;
4599 }
4600
4601 CallLoweringInfo &setTailCall(bool Value = true) {
4602 IsTailCall = Value;
4603 return *this;
4604 }
4605
4606 CallLoweringInfo &setDiscardResult(bool Value = true) {
4607 IsReturnValueUsed = !Value;
4608 return *this;
4609 }
4610
4611 CallLoweringInfo &setConvergent(bool Value = true) {
4612 IsConvergent = Value;
4613 return *this;
4614 }
4615
4616 CallLoweringInfo &setSExtResult(bool Value = true) {
4617 RetSExt = Value;
4618 return *this;
4619 }
4620
4621 CallLoweringInfo &setZExtResult(bool Value = true) {
4622 RetZExt = Value;
4623 return *this;
4624 }
4625
4626 CallLoweringInfo &setIsPatchPoint(bool Value = true) {
4627 IsPatchPoint = Value;
4628 return *this;
4629 }
4630
4631 CallLoweringInfo &setIsPreallocated(bool Value = true) {
4632 IsPreallocated = Value;
4633 return *this;
4634 }
4635
4636 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
4637 IsPostTypeLegalization = Value;
4638 return *this;
4639 }
4640
4641 CallLoweringInfo &setCFIType(const ConstantInt *Type) {
4642 CFIType = Type;
4643 return *this;
4644 }
4645
4646 CallLoweringInfo &setConvergenceControlToken(SDValue Token) {
4647 ConvergenceControlToken = Token;
4648 return *this;
4649 }
4650
4651 ArgListTy &getArgs() {
4652 return Args;
4653 }
4654 };
4655
4656 /// This structure is used to pass arguments to makeLibCall function.
4657 struct MakeLibCallOptions {
4658 // By passing type list before soften to makeLibCall, the target hook
4659 // shouldExtendTypeInLibCall can get the original type before soften.
4660 ArrayRef<EVT> OpsVTBeforeSoften;
4661 EVT RetVTBeforeSoften;
4662 bool IsSExt : 1;
4663 bool DoesNotReturn : 1;
4664 bool IsReturnValueUsed : 1;
4665 bool IsPostTypeLegalization : 1;
4666 bool IsSoften : 1;
4667
4668 MakeLibCallOptions()
4669 : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true),
4670 IsPostTypeLegalization(false), IsSoften(false) {}
4671
4672 MakeLibCallOptions &setSExt(bool Value = true) {
4673 IsSExt = Value;
4674 return *this;
4675 }
4676
4677 MakeLibCallOptions &setNoReturn(bool Value = true) {
4678 DoesNotReturn = Value;
4679 return *this;
4680 }
4681
4682 MakeLibCallOptions &setDiscardResult(bool Value = true) {
4683 IsReturnValueUsed = !Value;
4684 return *this;
4685 }
4686
4687 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
4688 IsPostTypeLegalization = Value;
4689 return *this;
4690 }
4691
4692 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
4693 bool Value = true) {
4694 OpsVTBeforeSoften = OpsVT;
4695 RetVTBeforeSoften = RetVT;
4696 IsSoften = Value;
4697 return *this;
4698 }
4699 };
4700
4701 /// This function lowers an abstract call to a function into an actual call.
4702 /// This returns a pair of operands. The first element is the return value
4703 /// for the function (if RetTy is not VoidTy). The second element is the
4704 /// outgoing token chain. It calls LowerCall to do the actual lowering.
4705 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
4706
4707 /// This hook must be implemented to lower calls into the specified
4708 /// DAG. The outgoing arguments to the call are described by the Outs array,
4709 /// and the values to be returned by the call are described by the Ins
4710 /// array. The implementation should fill in the InVals array with legal-type
4711 /// return values from the call, and return the resulting token chain value.
4712 virtual SDValue
4713 LowerCall(CallLoweringInfo &/*CLI*/,
4714 SmallVectorImpl<SDValue> &/*InVals*/) const {
4715 llvm_unreachable("Not Implemented");
4716 }
4717
4718 /// Target-specific cleanup for formal ByVal parameters.
4719 virtual void HandleByVal(CCState *, unsigned &, Align) const {}
4720
4721 /// This hook should be implemented to check whether the return values
4722 /// described by the Outs array can fit into the return registers. If false
4723 /// is returned, an sret-demotion is performed.
4724 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
4725 MachineFunction &/*MF*/, bool /*isVarArg*/,
4726 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
4727 LLVMContext &/*Context*/) const
4728 {
4729 // Return true by default to get preexisting behavior.
4730 return true;
4731 }
4732
4733 /// This hook must be implemented to lower outgoing return values, described
4734 /// by the Outs array, into the specified DAG. The implementation should
4735 /// return the resulting token chain value.
4736 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
4737 bool /*isVarArg*/,
4738 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/,
4739 const SmallVectorImpl<SDValue> & /*OutVals*/,
4740 const SDLoc & /*dl*/,
4741 SelectionDAG & /*DAG*/) const {
4742 llvm_unreachable("Not Implemented");
4743 }
4744
4745 /// Return true if result of the specified node is used by a return node
4746 /// only. It also compute and return the input chain for the tail call.
4747 ///
4748 /// This is used to determine whether it is possible to codegen a libcall as
4749 /// tail call at legalization time.
4750 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const {
4751 return false;
4752 }
4753
4754 /// Return true if the target may be able emit the call instruction as a tail
4755 /// call. This is used by optimization passes to determine if it's profitable
4756 /// to duplicate return instructions to enable tailcall optimization.
4757 virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
4758 return false;
4759 }
4760
4761 /// Return the builtin name for the __builtin___clear_cache intrinsic
4762 /// Default is to invoke the clear cache library call
4763 virtual const char * getClearCacheBuiltinName() const {
4764 return "__clear_cache";
4765 }
4766
4767 /// Return the register ID of the name passed in. Used by named register
4768 /// global variables extension. There is no target-independent behaviour
4769 /// so the default action is to bail.
4770 virtual Register getRegisterByName(const char* RegName, LLT Ty,
4771 const MachineFunction &MF) const {
4772 report_fatal_error(reason: "Named registers not implemented for this target");
4773 }
4774
4775 /// Return the type that should be used to zero or sign extend a
4776 /// zeroext/signext integer return value. FIXME: Some C calling conventions
4777 /// require the return type to be promoted, but this is not true all the time,
4778 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
4779 /// conventions. The frontend should handle this and include all of the
4780 /// necessary information.
4781 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
4782 ISD::NodeType /*ExtendKind*/) const {
4783 EVT MinVT = getRegisterType(MVT::i32);
4784 return VT.bitsLT(VT: MinVT) ? MinVT : VT;
4785 }
4786
4787 /// For some targets, an LLVM struct type must be broken down into multiple
4788 /// simple types, but the calling convention specifies that the entire struct
4789 /// must be passed in a block of consecutive registers.
4790 virtual bool
4791 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
4792 bool isVarArg,
4793 const DataLayout &DL) const {
4794 return false;
4795 }
4796
4797 /// For most targets, an LLVM type must be broken down into multiple
4798 /// smaller types. Usually the halves are ordered according to the endianness
4799 /// but for some platform that would break. So this method will default to
4800 /// matching the endianness but can be overridden.
4801 virtual bool
4802 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
4803 return DL.isLittleEndian();
4804 }
4805
4806 /// Returns a 0 terminated array of registers that can be safely used as
4807 /// scratch registers.
4808 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
4809 return nullptr;
4810 }
4811
4812 /// Returns a 0 terminated array of rounding control registers that can be
4813 /// attached into strict FP call.
4814 virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const {
4815 return ArrayRef<MCPhysReg>();
4816 }
4817
4818 /// This callback is used to prepare for a volatile or atomic load.
4819 /// It takes a chain node as input and returns the chain for the load itself.
4820 ///
4821 /// Having a callback like this is necessary for targets like SystemZ,
4822 /// which allows a CPU to reuse the result of a previous load indefinitely,
4823 /// even if a cache-coherent store is performed by another CPU. The default
4824 /// implementation does nothing.
4825 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
4826 SelectionDAG &DAG) const {
4827 return Chain;
4828 }
4829
4830 /// This callback is invoked by the type legalizer to legalize nodes with an
4831 /// illegal operand type but legal result types. It replaces the
4832 /// LowerOperation callback in the type Legalizer. The reason we can not do
4833 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to
4834 /// use this callback.
4835 ///
4836 /// TODO: Consider merging with ReplaceNodeResults.
4837 ///
4838 /// The target places new result values for the node in Results (their number
4839 /// and types must exactly match those of the original return values of
4840 /// the node), or leaves Results empty, which indicates that the node is not
4841 /// to be custom lowered after all.
4842 /// The default implementation calls LowerOperation.
4843 virtual void LowerOperationWrapper(SDNode *N,
4844 SmallVectorImpl<SDValue> &Results,
4845 SelectionDAG &DAG) const;
4846
4847 /// This callback is invoked for operations that are unsupported by the
4848 /// target, which are registered to use 'custom' lowering, and whose defined
4849 /// values are all legal. If the target has no operations that require custom
4850 /// lowering, it need not implement this. The default implementation of this
4851 /// aborts.
4852 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
4853
4854 /// This callback is invoked when a node result type is illegal for the
4855 /// target, and the operation was registered to use 'custom' lowering for that
4856 /// result type. The target places new result values for the node in Results
4857 /// (their number and types must exactly match those of the original return
4858 /// values of the node), or leaves Results empty, which indicates that the
4859 /// node is not to be custom lowered after all.
4860 ///
4861 /// If the target has no operations that require custom lowering, it need not
4862 /// implement this. The default implementation aborts.
4863 virtual void ReplaceNodeResults(SDNode * /*N*/,
4864 SmallVectorImpl<SDValue> &/*Results*/,
4865 SelectionDAG &/*DAG*/) const {
4866 llvm_unreachable("ReplaceNodeResults not implemented for this target!");
4867 }
4868
4869 /// This method returns the name of a target specific DAG node.
4870 virtual const char *getTargetNodeName(unsigned Opcode) const;
4871
4872 /// This method returns a target specific FastISel object, or null if the
4873 /// target does not support "fast" ISel.
4874 virtual FastISel *createFastISel(FunctionLoweringInfo &,
4875 const TargetLibraryInfo *) const {
4876 return nullptr;
4877 }
4878
4879 bool verifyReturnAddressArgumentIsConstant(SDValue Op,
4880 SelectionDAG &DAG) const;
4881
4882#ifndef NDEBUG
4883 /// Check the given SDNode. Aborts if it is invalid.
4884 virtual void verifyTargetSDNode(const SDNode *N) const {};
4885#endif
4886
4887 //===--------------------------------------------------------------------===//
4888 // Inline Asm Support hooks
4889 //
4890
4891 /// This hook allows the target to expand an inline asm call to be explicit
4892 /// llvm code if it wants to. This is useful for turning simple inline asms
4893 /// into LLVM intrinsics, which gives the compiler more information about the
4894 /// behavior of the code.
4895 virtual bool ExpandInlineAsm(CallInst *) const {
4896 return false;
4897 }
4898
4899 enum ConstraintType {
4900 C_Register, // Constraint represents specific register(s).
4901 C_RegisterClass, // Constraint represents any of register(s) in class.
4902 C_Memory, // Memory constraint.
4903 C_Address, // Address constraint.
4904 C_Immediate, // Requires an immediate.
4905 C_Other, // Something else.
4906 C_Unknown // Unsupported constraint.
4907 };
4908
4909 enum ConstraintWeight {
4910 // Generic weights.
4911 CW_Invalid = -1, // No match.
4912 CW_Okay = 0, // Acceptable.
4913 CW_Good = 1, // Good weight.
4914 CW_Better = 2, // Better weight.
4915 CW_Best = 3, // Best weight.
4916
4917 // Well-known weights.
4918 CW_SpecificReg = CW_Okay, // Specific register operands.
4919 CW_Register = CW_Good, // Register operands.
4920 CW_Memory = CW_Better, // Memory operands.
4921 CW_Constant = CW_Best, // Constant operand.
4922 CW_Default = CW_Okay // Default or don't know type.
4923 };
4924
4925 /// This contains information for each constraint that we are lowering.
4926 struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
4927 /// This contains the actual string for the code, like "m". TargetLowering
4928 /// picks the 'best' code from ConstraintInfo::Codes that most closely
4929 /// matches the operand.
4930 std::string ConstraintCode;
4931
4932 /// Information about the constraint code, e.g. Register, RegisterClass,
4933 /// Memory, Other, Unknown.
4934 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
4935
4936 /// If this is the result output operand or a clobber, this is null,
4937 /// otherwise it is the incoming operand to the CallInst. This gets
4938 /// modified as the asm is processed.
4939 Value *CallOperandVal = nullptr;
4940
4941 /// The ValueType for the operand value.
4942 MVT ConstraintVT = MVT::Other;
4943
4944 /// Copy constructor for copying from a ConstraintInfo.
4945 AsmOperandInfo(InlineAsm::ConstraintInfo Info)
4946 : InlineAsm::ConstraintInfo(std::move(Info)) {}
4947
4948 /// Return true of this is an input operand that is a matching constraint
4949 /// like "4".
4950 bool isMatchingInputConstraint() const;
4951
4952 /// If this is an input matching constraint, this method returns the output
4953 /// operand it matches.
4954 unsigned getMatchedOperand() const;
4955 };
4956
4957 using AsmOperandInfoVector = std::vector<AsmOperandInfo>;
4958
4959 /// Split up the constraint string from the inline assembly value into the
4960 /// specific constraints and their prefixes, and also tie in the associated
4961 /// operand values. If this returns an empty vector, and if the constraint
4962 /// string itself isn't empty, there was an error parsing.
4963 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
4964 const TargetRegisterInfo *TRI,
4965 const CallBase &Call) const;
4966
4967 /// Examine constraint type and operand type and determine a weight value.
4968 /// The operand object must already have been set up with the operand type.
4969 virtual ConstraintWeight getMultipleConstraintMatchWeight(
4970 AsmOperandInfo &info, int maIndex) const;
4971
4972 /// Examine constraint string and operand type and determine a weight value.
4973 /// The operand object must already have been set up with the operand type.
4974 virtual ConstraintWeight getSingleConstraintMatchWeight(
4975 AsmOperandInfo &info, const char *constraint) const;
4976
4977 /// Determines the constraint code and constraint type to use for the specific
4978 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4979 /// If the actual operand being passed in is available, it can be passed in as
4980 /// Op, otherwise an empty SDValue can be passed.
4981 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4982 SDValue Op,
4983 SelectionDAG *DAG = nullptr) const;
4984
4985 /// Given a constraint, return the type of constraint it is for this target.
4986 virtual ConstraintType getConstraintType(StringRef Constraint) const;
4987
4988 using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>;
4989 using ConstraintGroup = SmallVector<ConstraintPair>;
4990 /// Given an OpInfo with list of constraints codes as strings, return a
4991 /// sorted Vector of pairs of constraint codes and their types in priority of
4992 /// what we'd prefer to lower them as. This may contain immediates that
4993 /// cannot be lowered, but it is meant to be a machine agnostic order of
4994 /// preferences.
4995 ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const;
4996
4997 /// Given a physical register constraint (e.g. {edx}), return the register
4998 /// number and the register class for the register.
4999 ///
5000 /// Given a register class constraint, like 'r', if this corresponds directly
5001 /// to an LLVM register class, return a register of 0 and the register class
5002 /// pointer.
5003 ///
5004 /// This should only be used for C_Register constraints. On error, this
5005 /// returns a register number of 0 and a null register class pointer.
5006 virtual std::pair<unsigned, const TargetRegisterClass *>
5007 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5008 StringRef Constraint, MVT VT) const;
5009
5010 virtual InlineAsm::ConstraintCode
5011 getInlineAsmMemConstraint(StringRef ConstraintCode) const {
5012 if (ConstraintCode == "m")
5013 return InlineAsm::ConstraintCode::m;
5014 if (ConstraintCode == "o")
5015 return InlineAsm::ConstraintCode::o;
5016 if (ConstraintCode == "X")
5017 return InlineAsm::ConstraintCode::X;
5018 if (ConstraintCode == "p")
5019 return InlineAsm::ConstraintCode::p;
5020 return InlineAsm::ConstraintCode::Unknown;
5021 }
5022
5023 /// Try to replace an X constraint, which matches anything, with another that
5024 /// has more specific requirements based on the type of the corresponding
5025 /// operand. This returns null if there is no replacement to make.
5026 virtual const char *LowerXConstraint(EVT ConstraintVT) const;
5027
5028 /// Lower the specified operand into the Ops vector. If it is invalid, don't
5029 /// add anything to Ops.
5030 virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
5031 std::vector<SDValue> &Ops,
5032 SelectionDAG &DAG) const;
5033
5034 // Lower custom output constraints. If invalid, return SDValue().
5035 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
5036 const SDLoc &DL,
5037 const AsmOperandInfo &OpInfo,
5038 SelectionDAG &DAG) const;
5039
5040 // Targets may override this function to collect operands from the CallInst
5041 // and for example, lower them into the SelectionDAG operands.
5042 virtual void CollectTargetIntrinsicOperands(const CallInst &I,
5043 SmallVectorImpl<SDValue> &Ops,
5044 SelectionDAG &DAG) const;
5045
5046 //===--------------------------------------------------------------------===//
5047 // Div utility functions
5048 //
5049
5050 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
5051 SmallVectorImpl<SDNode *> &Created) const;
5052 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
5053 SmallVectorImpl<SDNode *> &Created) const;
5054 // Build sdiv by power-of-2 with conditional move instructions
5055 SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor,
5056 SelectionDAG &DAG,
5057 SmallVectorImpl<SDNode *> &Created) const;
5058
5059 /// Targets may override this function to provide custom SDIV lowering for
5060 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
5061 /// assumes SDIV is expensive and replaces it with a series of other integer
5062 /// operations.
5063 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5064 SelectionDAG &DAG,
5065 SmallVectorImpl<SDNode *> &Created) const;
5066
5067 /// Targets may override this function to provide custom SREM lowering for
5068 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
5069 /// assumes SREM is expensive and replaces it with a series of other integer
5070 /// operations.
5071 virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor,
5072 SelectionDAG &DAG,
5073 SmallVectorImpl<SDNode *> &Created) const;
5074
5075 /// Indicate whether this target prefers to combine FDIVs with the same
5076 /// divisor. If the transform should never be done, return zero. If the
5077 /// transform should be done, return the minimum number of divisor uses
5078 /// that must exist.
5079 virtual unsigned combineRepeatedFPDivisors() const {
5080 return 0;
5081 }
5082
5083 /// Hooks for building estimates in place of slower divisions and square
5084 /// roots.
5085
5086 /// Return either a square root or its reciprocal estimate value for the input
5087 /// operand.
5088 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
5089 /// 'Enabled' as set by a potential default override attribute.
5090 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
5091 /// refinement iterations required to generate a sufficient (though not
5092 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
5093 /// The boolean UseOneConstNR output is used to select a Newton-Raphson
5094 /// algorithm implementation that uses either one or two constants.
5095 /// The boolean Reciprocal is used to select whether the estimate is for the
5096 /// square root of the input operand or the reciprocal of its square root.
5097 /// A target may choose to implement its own refinement within this function.
5098 /// If that's true, then return '0' as the number of RefinementSteps to avoid
5099 /// any further refinement of the estimate.
5100 /// An empty SDValue return means no estimate sequence can be created.
5101 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
5102 int Enabled, int &RefinementSteps,
5103 bool &UseOneConstNR, bool Reciprocal) const {
5104 return SDValue();
5105 }
5106
5107 /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is
5108 /// required for correctness since InstCombine might have canonicalized a
5109 /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall
5110 /// through to the default expansion/soften to libcall, we might introduce a
5111 /// link-time dependency on libm into a file that originally did not have one.
5112 SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const;
5113
5114 /// Return a reciprocal estimate value for the input operand.
5115 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
5116 /// 'Enabled' as set by a potential default override attribute.
5117 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
5118 /// refinement iterations required to generate a sufficient (though not
5119 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
5120 /// A target may choose to implement its own refinement within this function.
5121 /// If that's true, then return '0' as the number of RefinementSteps to avoid
5122 /// any further refinement of the estimate.
5123 /// An empty SDValue return means no estimate sequence can be created.
5124 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
5125 int Enabled, int &RefinementSteps) const {
5126 return SDValue();
5127 }
5128
5129 /// Return a target-dependent comparison result if the input operand is
5130 /// suitable for use with a square root estimate calculation. For example, the
5131 /// comparison may check if the operand is NAN, INF, zero, normal, etc. The
5132 /// result should be used as the condition operand for a select or branch.
5133 virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
5134 const DenormalMode &Mode) const;
5135
5136 /// Return a target-dependent result if the input operand is not suitable for
5137 /// use with a square root estimate calculation.
5138 virtual SDValue getSqrtResultForDenormInput(SDValue Operand,
5139 SelectionDAG &DAG) const {
5140 return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType());
5141 }
5142
5143 //===--------------------------------------------------------------------===//
5144 // Legalization utility functions
5145 //
5146
5147 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
5148 /// respectively, each computing an n/2-bit part of the result.
5149 /// \param Result A vector that will be filled with the parts of the result
5150 /// in little-endian order.
5151 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
5152 /// if you want to control how low bits are extracted from the LHS.
5153 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
5154 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
5155 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
5156 /// \returns true if the node has been expanded, false if it has not
5157 bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS,
5158 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
5159 SelectionDAG &DAG, MulExpansionKind Kind,
5160 SDValue LL = SDValue(), SDValue LH = SDValue(),
5161 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
5162
5163 /// Expand a MUL into two nodes. One that computes the high bits of
5164 /// the result and one that computes the low bits.
5165 /// \param HiLoVT The value type to use for the Lo and Hi nodes.
5166 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
5167 /// if you want to control how low bits are extracted from the LHS.
5168 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
5169 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
5170 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
5171 /// \returns true if the node has been expanded. false if it has not
5172 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5173 SelectionDAG &DAG, MulExpansionKind Kind,
5174 SDValue LL = SDValue(), SDValue LH = SDValue(),
5175 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
5176
5177 /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit
5178 /// urem by constant and other arithmetic ops. The n/2-bit urem by constant
5179 /// will be expanded by DAGCombiner. This is not possible for all constant
5180 /// divisors.
5181 /// \param N Node to expand
5182 /// \param Result A vector that will be filled with the lo and high parts of
5183 /// the results. For *DIVREM, this will be the quotient parts followed
5184 /// by the remainder parts.
5185 /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be
5186 /// half of VT.
5187 /// \param LL Low bits of the LHS of the operation. You can use this
5188 /// parameter if you want to control how low bits are extracted from
5189 /// the LHS.
5190 /// \param LH High bits of the LHS of the operation. See LL for meaning.
5191 /// \returns true if the node has been expanded, false if it has not.
5192 bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result,
5193 EVT HiLoVT, SelectionDAG &DAG,
5194 SDValue LL = SDValue(),
5195 SDValue LH = SDValue()) const;
5196
5197 /// Expand funnel shift.
5198 /// \param N Node to expand
5199 /// \returns The expansion if successful, SDValue() otherwise
5200 SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;
5201
5202 /// Expand rotations.
5203 /// \param N Node to expand
5204 /// \param AllowVectorOps expand vector rotate, this should only be performed
5205 /// if the legalization is happening outside of LegalizeVectorOps
5206 /// \returns The expansion if successful, SDValue() otherwise
5207 SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const;
5208
5209 /// Expand shift-by-parts.
5210 /// \param N Node to expand
5211 /// \param Lo lower-output-part after conversion
5212 /// \param Hi upper-output-part after conversion
5213 void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi,
5214 SelectionDAG &DAG) const;
5215
5216 /// Expand float(f32) to SINT(i64) conversion
5217 /// \param N Node to expand
5218 /// \param Result output after conversion
5219 /// \returns True, if the expansion was successful, false otherwise
5220 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
5221
5222 /// Expand float to UINT conversion
5223 /// \param N Node to expand
5224 /// \param Result output after conversion
5225 /// \param Chain output chain after conversion
5226 /// \returns True, if the expansion was successful, false otherwise
5227 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
5228 SelectionDAG &DAG) const;
5229
5230 /// Expand UINT(i64) to double(f64) conversion
5231 /// \param N Node to expand
5232 /// \param Result output after conversion
5233 /// \param Chain output chain after conversion
5234 /// \returns True, if the expansion was successful, false otherwise
5235 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
5236 SelectionDAG &DAG) const;
5237
5238 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
5239 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
5240
5241 /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
5242 /// \param N Node to expand
5243 /// \returns The expansion result
5244 SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const;
5245
5246 /// Truncate Op to ResultVT. If the result is exact, leave it alone. If it is
5247 /// not exact, force the result to be odd.
5248 /// \param ResultVT The type of result.
5249 /// \param Op The value to round.
5250 /// \returns The expansion result
5251 SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL,
5252 SelectionDAG &DAG) const;
5253
5254 /// Expand round(fp) to fp conversion
5255 /// \param N Node to expand
5256 /// \returns The expansion result
5257 SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const;
5258
5259 /// Expand check for floating point class.
5260 /// \param ResultVT The type of intrinsic call result.
5261 /// \param Op The tested value.
5262 /// \param Test The test to perform.
5263 /// \param Flags The optimization flags.
5264 /// \returns The expansion result or SDValue() if it fails.
5265 SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test,
5266 SDNodeFlags Flags, const SDLoc &DL,
5267 SelectionDAG &DAG) const;
5268
5269 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
5270 /// vector nodes can only succeed if all operations are legal/custom.
5271 /// \param N Node to expand
5272 /// \returns The expansion result or SDValue() if it fails.
5273 SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const;
5274
5275 /// Expand VP_CTPOP nodes.
5276 /// \returns The expansion result or SDValue() if it fails.
5277 SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const;
5278
5279 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
5280 /// vector nodes can only succeed if all operations are legal/custom.
5281 /// \param N Node to expand
5282 /// \returns The expansion result or SDValue() if it fails.
5283 SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
5284
5285 /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
5286 /// \param N Node to expand
5287 /// \returns The expansion result or SDValue() if it fails.
5288 SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const;
5289
5290 /// Expand CTTZ via Table Lookup.
5291 /// \param N Node to expand
5292 /// \returns The expansion result or SDValue() if it fails.
5293 SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5294 SDValue Op, unsigned NumBitsPerElt) const;
5295
5296 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
5297 /// vector nodes can only succeed if all operations are legal/custom.
5298 /// \param N Node to expand
5299 /// \returns The expansion result or SDValue() if it fails.
5300 SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
5301
5302 /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
5303 /// \param N Node to expand
5304 /// \returns The expansion result or SDValue() if it fails.
5305 SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const;
5306
5307 /// Expand ABS nodes. Expands vector/scalar ABS nodes,
5308 /// vector nodes can only succeed if all operations are legal/custom.
5309 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
5310 /// \param N Node to expand
5311 /// \param IsNegative indicate negated abs
5312 /// \returns The expansion result or SDValue() if it fails.
5313 SDValue expandABS(SDNode *N, SelectionDAG &DAG,
5314 bool IsNegative = false) const;
5315
5316 /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
5317 /// \param N Node to expand
5318 /// \returns The expansion result or SDValue() if it fails.
5319 SDValue expandABD(SDNode *N, SelectionDAG &DAG) const;
5320
5321 /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
5322 /// scalar types. Returns SDValue() if expand fails.
5323 /// \param N Node to expand
5324 /// \returns The expansion result or SDValue() if it fails.
5325 SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const;
5326
5327 /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with
5328 /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node
5329 /// to expand \returns The expansion result or SDValue() if it fails.
5330 SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const;
5331
5332 /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes.
5333 /// Returns SDValue() if expand fails.
5334 /// \param N Node to expand
5335 /// \returns The expansion result or SDValue() if it fails.
5336 SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const;
5337
5338 /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with
5339 /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The
5340 /// expansion result or SDValue() if it fails.
5341 SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const;
5342
5343 /// Turn load of vector type into a load of the individual elements.
5344 /// \param LD load to expand
5345 /// \returns BUILD_VECTOR and TokenFactor nodes.
5346 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD,
5347 SelectionDAG &DAG) const;
5348
5349 // Turn a store of a vector type into stores of the individual elements.
5350 /// \param ST Store with a vector value type
5351 /// \returns TokenFactor of the individual store chains.
5352 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
5353
5354 /// Expands an unaligned load to 2 half-size loads for an integer, and
5355 /// possibly more for vectors.
5356 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD,
5357 SelectionDAG &DAG) const;
5358
5359 /// Expands an unaligned store to 2 half-size stores for integer values, and
5360 /// possibly more for vectors.
5361 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
5362
5363 /// Increments memory address \p Addr according to the type of the value
5364 /// \p DataVT that should be stored. If the data is stored in compressed
5365 /// form, the memory address should be incremented according to the number of
5366 /// the stored elements. This number is equal to the number of '1's bits
5367 /// in the \p Mask.
5368 /// \p DataVT is a vector type. \p Mask is a vector value.
5369 /// \p DataVT and \p Mask have the same number of vector elements.
5370 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
5371 EVT DataVT, SelectionDAG &DAG,
5372 bool IsCompressedMemory) const;
5373
5374 /// Get a pointer to vector element \p Idx located in memory for a vector of
5375 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
5376 /// bounds the returned pointer is unspecified, but will be within the vector
5377 /// bounds.
5378 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
5379 SDValue Index) const;
5380
5381 /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located
5382 /// in memory for a vector of type \p VecVT starting at a base address of
5383 /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the
5384 /// returned pointer is unspecified, but the value returned will be such that
5385 /// the entire subvector would be within the vector bounds.
5386 SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
5387 EVT SubVecVT, SDValue Index) const;
5388
5389 /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This
5390 /// method accepts integers as its arguments.
5391 SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const;
5392
5393 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
5394 /// method accepts integers as its arguments.
5395 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
5396
5397 /// Method for building the DAG expansion of ISD::[US]SHLSAT. This
5398 /// method accepts integers as its arguments.
5399 SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const;
5400
5401 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
5402 /// method accepts integers as its arguments.
5403 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
5404
5405 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
5406 /// method accepts integers as its arguments.
5407 /// Note: This method may fail if the division could not be performed
5408 /// within the type. Clients must retry with a wider type if this happens.
5409 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
5410 SDValue LHS, SDValue RHS,
5411 unsigned Scale, SelectionDAG &DAG) const;
5412
5413 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
5414 /// always suceeds and populates the Result and Overflow arguments.
5415 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5416 SelectionDAG &DAG) const;
5417
5418 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
5419 /// always suceeds and populates the Result and Overflow arguments.
5420 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5421 SelectionDAG &DAG) const;
5422
5423 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
5424 /// expansion was successful and populates the Result and Overflow arguments.
5425 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
5426 SelectionDAG &DAG) const;
5427
5428 /// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or
5429 /// brute force via a wide multiplication. The expansion works by
5430 /// attempting to do a multiplication on a wider type twice the size of the
5431 /// original operands. LL and LH represent the lower and upper halves of the
5432 /// first operand. RL and RH represent the lower and upper halves of the
5433 /// second operand. The upper and lower halves of the result are stored in Lo
5434 /// and Hi.
5435 void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5436 EVT WideVT, const SDValue LL, const SDValue LH,
5437 const SDValue RL, const SDValue RH, SDValue &Lo,
5438 SDValue &Hi) const;
5439
5440 /// Same as above, but creates the upper halves of each operand by
5441 /// sign/zero-extending the operands.
5442 void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5443 const SDValue LHS, const SDValue RHS, SDValue &Lo,
5444 SDValue &Hi) const;
5445
5446 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
5447 /// only the first Count elements of the vector are used.
5448 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
5449
5450 /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
5451 SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
5452
5453 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
5454 /// Returns true if the expansion was successful.
5455 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
5456
5457 /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This
5458 /// method accepts vectors as its arguments.
5459 SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
5460
5461 /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC
5462 /// on the current target. A VP_SETCC will additionally be given a Mask
5463 /// and/or EVL not equal to SDValue().
5464 ///
5465 /// If the SETCC has been legalized using AND / OR, then the legalized node
5466 /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
5467 /// will be set to false. This will also hold if the VP_SETCC has been
5468 /// legalized using VP_AND / VP_OR.
5469 ///
5470 /// If the SETCC / VP_SETCC has been legalized by using
5471 /// getSetCCSwappedOperands(), then the values of LHS and RHS will be
5472 /// swapped, CC will be set to the new condition, and NeedInvert will be set
5473 /// to false.
5474 ///
5475 /// If the SETCC / VP_SETCC has been legalized using the inverse condcode,
5476 /// then LHS and RHS will be unchanged, CC will set to the inverted condcode,
5477 /// and NeedInvert will be set to true. The caller must invert the result of
5478 /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to
5479 /// swap the effect of a true/false result.
5480 ///
5481 /// \returns true if the SETCC / VP_SETCC has been legalized, false if it
5482 /// hasn't.
5483 bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS,
5484 SDValue &RHS, SDValue &CC, SDValue Mask,
5485 SDValue EVL, bool &NeedInvert, const SDLoc &dl,
5486 SDValue &Chain, bool IsSignaling = false) const;
5487
5488 //===--------------------------------------------------------------------===//
5489 // Instruction Emitting Hooks
5490 //
5491
5492 /// This method should be implemented by targets that mark instructions with
5493 /// the 'usesCustomInserter' flag. These instructions are special in various
5494 /// ways, which require special support to insert. The specified MachineInstr
5495 /// is created but not inserted into any basic blocks, and this method is
5496 /// called to expand it into a sequence of instructions, potentially also
5497 /// creating new basic blocks and control flow.
5498 /// As long as the returned basic block is different (i.e., we created a new
5499 /// one), the custom inserter is free to modify the rest of \p MBB.
5500 virtual MachineBasicBlock *
5501 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
5502
5503 /// This method should be implemented by targets that mark instructions with
5504 /// the 'hasPostISelHook' flag. These instructions must be adjusted after
5505 /// instruction selection by target hooks. e.g. To fill in optional defs for
5506 /// ARM 's' setting instructions.
5507 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
5508 SDNode *Node) const;
5509
5510 /// If this function returns true, SelectionDAGBuilder emits a
5511 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
5512 virtual bool useLoadStackGuardNode() const {
5513 return false;
5514 }
5515
5516 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
5517 const SDLoc &DL) const {
5518 llvm_unreachable("not implemented for this target");
5519 }
5520
5521 /// Lower TLS global address SDNode for target independent emulated TLS model.
5522 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
5523 SelectionDAG &DAG) const;
5524
5525 /// Expands target specific indirect branch for the case of JumpTable
5526 /// expansion.
5527 virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
5528 SDValue Addr, int JTI,
5529 SelectionDAG &DAG) const;
5530
5531 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
5532 // If we're comparing for equality to zero and isCtlzFast is true, expose the
5533 // fact that this can be implemented as a ctlz/srl pair, so that the dag
5534 // combiner can fold the new nodes.
5535 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
5536
5537 // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y`
5538 virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const {
5539 return true;
5540 }
5541
5542private:
5543 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5544 const SDLoc &DL, DAGCombinerInfo &DCI) const;
5545 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5546 const SDLoc &DL, DAGCombinerInfo &DCI) const;
5547
5548 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
5549 SDValue N1, ISD::CondCode Cond,
5550 DAGCombinerInfo &DCI,
5551 const SDLoc &DL) const;
5552
5553 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5554 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
5555 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
5556 DAGCombinerInfo &DCI, const SDLoc &DL) const;
5557
5558 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
5559 SDValue CompTargetNode, ISD::CondCode Cond,
5560 DAGCombinerInfo &DCI, const SDLoc &DL,
5561 SmallVectorImpl<SDNode *> &Created) const;
5562 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
5563 ISD::CondCode Cond, DAGCombinerInfo &DCI,
5564 const SDLoc &DL) const;
5565
5566 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5567 SDValue CompTargetNode, ISD::CondCode Cond,
5568 DAGCombinerInfo &DCI, const SDLoc &DL,
5569 SmallVectorImpl<SDNode *> &Created) const;
5570 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
5571 ISD::CondCode Cond, DAGCombinerInfo &DCI,
5572 const SDLoc &DL) const;
5573};
5574
5575/// Given an LLVM IR type and return type attributes, compute the return value
5576/// EVTs and flags, and optionally also the offsets, if the return value is
5577/// being lowered to memory.
5578void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr,
5579 SmallVectorImpl<ISD::OutputArg> &Outs,
5580 const TargetLowering &TLI, const DataLayout &DL);
5581
5582} // end namespace llvm
5583
5584#endif // LLVM_CODEGEN_TARGETLOWERING_H
5585

source code of llvm/include/llvm/CodeGen/TargetLowering.h