CodeGenPrepare.cpp source code [llvm/lib/CodeGen/CodeGenPrepare.cpp]

1	//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass munges the code in the input function to better prepare it for
10	// SelectionDAG-based code generation. This works around limitations in it's
11	// basic-block-at-a-time approach. It should eventually be removed.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "llvm/CodeGen/CodeGenPrepare.h"
16	#include "llvm/ADT/APInt.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/DenseMap.h"
19	#include "llvm/ADT/MapVector.h"
20	#include "llvm/ADT/PointerIntPair.h"
21	#include "llvm/ADT/STLExtras.h"
22	#include "llvm/ADT/SmallPtrSet.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/ADT/Statistic.h"
25	#include "llvm/Analysis/BlockFrequencyInfo.h"
26	#include "llvm/Analysis/BranchProbabilityInfo.h"
27	#include "llvm/Analysis/FloatingPointPredicateUtils.h"
28	#include "llvm/Analysis/InstructionSimplify.h"
29	#include "llvm/Analysis/LoopInfo.h"
30	#include "llvm/Analysis/ProfileSummaryInfo.h"
31	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
32	#include "llvm/Analysis/TargetLibraryInfo.h"
33	#include "llvm/Analysis/TargetTransformInfo.h"
34	#include "llvm/Analysis/ValueTracking.h"
35	#include "llvm/Analysis/VectorUtils.h"
36	#include "llvm/CodeGen/Analysis.h"
37	#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
38	#include "llvm/CodeGen/ISDOpcodes.h"
39	#include "llvm/CodeGen/SelectionDAGNodes.h"
40	#include "llvm/CodeGen/TargetLowering.h"
41	#include "llvm/CodeGen/TargetPassConfig.h"
42	#include "llvm/CodeGen/TargetSubtargetInfo.h"
43	#include "llvm/CodeGen/ValueTypes.h"
44	#include "llvm/CodeGenTypes/MachineValueType.h"
45	#include "llvm/Config/llvm-config.h"
46	#include "llvm/IR/Argument.h"
47	#include "llvm/IR/Attributes.h"
48	#include "llvm/IR/BasicBlock.h"
49	#include "llvm/IR/Constant.h"
50	#include "llvm/IR/Constants.h"
51	#include "llvm/IR/DataLayout.h"
52	#include "llvm/IR/DebugInfo.h"
53	#include "llvm/IR/DerivedTypes.h"
54	#include "llvm/IR/Dominators.h"
55	#include "llvm/IR/Function.h"
56	#include "llvm/IR/GetElementPtrTypeIterator.h"
57	#include "llvm/IR/GlobalValue.h"
58	#include "llvm/IR/GlobalVariable.h"
59	#include "llvm/IR/IRBuilder.h"
60	#include "llvm/IR/InlineAsm.h"
61	#include "llvm/IR/InstrTypes.h"
62	#include "llvm/IR/Instruction.h"
63	#include "llvm/IR/Instructions.h"
64	#include "llvm/IR/IntrinsicInst.h"
65	#include "llvm/IR/Intrinsics.h"
66	#include "llvm/IR/IntrinsicsAArch64.h"
67	#include "llvm/IR/LLVMContext.h"
68	#include "llvm/IR/MDBuilder.h"
69	#include "llvm/IR/Module.h"
70	#include "llvm/IR/Operator.h"
71	#include "llvm/IR/PatternMatch.h"
72	#include "llvm/IR/ProfDataUtils.h"
73	#include "llvm/IR/Statepoint.h"
74	#include "llvm/IR/Type.h"
75	#include "llvm/IR/Use.h"
76	#include "llvm/IR/User.h"
77	#include "llvm/IR/Value.h"
78	#include "llvm/IR/ValueHandle.h"
79	#include "llvm/IR/ValueMap.h"
80	#include "llvm/InitializePasses.h"
81	#include "llvm/Pass.h"
82	#include "llvm/Support/BlockFrequency.h"
83	#include "llvm/Support/BranchProbability.h"
84	#include "llvm/Support/Casting.h"
85	#include "llvm/Support/CommandLine.h"
86	#include "llvm/Support/Compiler.h"
87	#include "llvm/Support/Debug.h"
88	#include "llvm/Support/ErrorHandling.h"
89	#include "llvm/Support/raw_ostream.h"
90	#include "llvm/Target/TargetMachine.h"
91	#include "llvm/Target/TargetOptions.h"
92	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
93	#include "llvm/Transforms/Utils/BypassSlowDivision.h"
94	#include "llvm/Transforms/Utils/Local.h"
95	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
96	#include "llvm/Transforms/Utils/SizeOpts.h"
97	#include <algorithm>
98	#include <cassert>
99	#include <cstdint>
100	#include <iterator>
101	#include <limits>
102	#include <memory>
103	#include <optional>
104	#include <utility>
105	#include <vector>
106
107	using namespace llvm;
108	using namespace llvm::PatternMatch;
109
110	#define DEBUG_TYPE "codegenprepare"
111
112	STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113	STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114	STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115	STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116	"sunken Cmps");
117	STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118	"of sunken Casts");
119	STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120	"computations were sunk");
121	STATISTIC(NumMemoryInstsPhiCreated,
122	"Number of phis created when address "
123	"computations were sunk to memory instructions");
124	STATISTIC(NumMemoryInstsSelectCreated,
125	"Number of select created when address "
126	"computations were sunk to memory instructions");
127	STATISTIC(NumExtsMoved, "Number of [s\|z]ext instructions combined with loads");
128	STATISTIC(NumExtUses, "Number of uses of [s\|z]ext instructions optimized");
129	STATISTIC(NumAndsAdded,
130	"Number of and mask instructions added to form ext loads");
131	STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132	STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133	STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134	STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135	STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
137	static cl::opt<bool> DisableBranchOpts(
138	"disable-cgp-branch-opts", cl::Hidden, cl::init(Val: false),
139	cl::desc ("Disable branch optimizations in CodeGenPrepare"));
140
141	static cl::opt<bool>
142	DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(Val: false),
143	cl::desc ("Disable GC optimizations in CodeGenPrepare"));
144
145	static cl::opt<bool>
146	DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147	cl::init(Val: false),
148	cl::desc ("Disable select to branch conversion."));
149
150	static cl::opt<bool>
151	AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(Val: true),
152	cl::desc ("Address sinking in CGP using GEPs."));
153
154	static cl::opt<bool>
155	EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(Val: true),
156	cl::desc ("Enable sinking and/cmp into branches."));
157
158	static cl::opt<bool> DisableStoreExtract(
159	"disable-cgp-store-extract", cl::Hidden, cl::init(Val: false),
160	cl::desc ("Disable store(extract) optimizations in CodeGenPrepare"));
161
162	static cl::opt<bool> StressStoreExtract(
163	"stress-cgp-store-extract", cl::Hidden, cl::init(Val: false),
164	cl::desc ("Stress test store(extract) optimizations in CodeGenPrepare"));
165
166	static cl::opt<bool> DisableExtLdPromotion(
167	"disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(Val: false),
168	cl::desc ("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169	"CodeGenPrepare"));
170
171	static cl::opt<bool> StressExtLdPromotion(
172	"stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(Val: false),
173	cl::desc ("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174	"optimization in CodeGenPrepare"));
175
176	static cl::opt<bool> DisablePreheaderProtect(
177	"disable-preheader-prot", cl::Hidden, cl::init(Val: false),
178	cl::desc ("Disable protection against removing loop preheaders"));
179
180	static cl::opt<bool> ProfileGuidedSectionPrefix(
181	"profile-guided-section-prefix", cl::Hidden, cl::init(Val: true),
182	cl::desc ("Use profile info to add section prefix for hot/cold functions"));
183
184	static cl::opt<bool> ProfileUnknownInSpecialSection(
185	"profile-unknown-in-special-section", cl::Hidden,
186	cl::desc ("In profiling mode like sampleFDO, if a function doesn't have "
187	"profile, we cannot tell the function is cold for sure because "
188	"it may be a function newly added without ever being sampled. "
189	"With the flag enabled, compiler can put such profile unknown "
190	"functions into a special section, so runtime system can choose "
191	"to handle it in a different way than .text section, to save "
192	"RAM for example. "));
193
194	static cl::opt<bool> BBSectionsGuidedSectionPrefix(
195	"bbsections-guided-section-prefix", cl::Hidden, cl::init(Val: true),
196	cl::desc ("Use the basic-block-sections profile to determine the text "
197	"section prefix for hot functions. Functions with "
198	"basic-block-sections profile will be placed in `.text.hot` "
199	"regardless of their FDO profile info. Other functions won't be "
200	"impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201	"profiles."));
202
203	static cl::opt<uint64_t> FreqRatioToSkipMerge(
204	"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(Val: `2`),
205	cl::desc ("Skip merging empty blocks if (frequency of empty block) / "
206	"(frequency of destination block) is greater than this ratio"));
207
208	static cl::opt<bool> ForceSplitStore(
209	"force-split-store", cl::Hidden, cl::init(Val: false),
210	cl::desc ("Force store splitting no matter what the target query says."));
211
212	static cl::opt<bool> EnableTypePromotionMerge(
213	"cgp-type-promotion-merge", cl::Hidden,
214	cl::desc ("Enable merging of redundant sexts when one is dominating"
215	" the other."),
216	cl::init(Val: true));
217
218	static cl::opt<bool> DisableComplexAddrModes(
219	"disable-complex-addr-modes", cl::Hidden, cl::init(Val: false),
220	cl::desc ("Disables combining addressing modes with different parts "
221	"in optimizeMemoryInst."));
222
223	static cl::opt<bool>
224	AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(Val: false),
225	cl::desc ("Allow creation of Phis in Address sinking."));
226
227	static cl::opt<bool> AddrSinkNewSelects(
228	"addr-sink-new-select", cl::Hidden, cl::init(Val: true),
229	cl::desc ("Allow creation of selects in Address sinking."));
230
231	static cl::opt<bool> AddrSinkCombineBaseReg(
232	"addr-sink-combine-base-reg", cl::Hidden, cl::init(Val: true),
233	cl::desc ("Allow combining of BaseReg field in Address sinking."));
234
235	static cl::opt<bool> AddrSinkCombineBaseGV(
236	"addr-sink-combine-base-gv", cl::Hidden, cl::init(Val: true),
237	cl::desc ("Allow combining of BaseGV field in Address sinking."));
238
239	static cl::opt<bool> AddrSinkCombineBaseOffs(
240	"addr-sink-combine-base-offs", cl::Hidden, cl::init(Val: true),
241	cl::desc ("Allow combining of BaseOffs field in Address sinking."));
242
243	static cl::opt<bool> AddrSinkCombineScaledReg(
244	"addr-sink-combine-scaled-reg", cl::Hidden, cl::init(Val: true),
245	cl::desc ("Allow combining of ScaledReg field in Address sinking."));
246
247	static cl::opt<bool>
248	EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249	cl::init(Val: true),
250	cl::desc ("Enable splitting large offset of GEP."));
251
252	static cl::opt<bool> EnableICMP_EQToICMP_ST(
253	"cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(Val: false),
254	cl::desc ("Enable ICMP_EQ to ICMP_S(L\|G)T conversion."));
255
256	static cl::opt<bool>
257	VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(Val: false),
258	cl::desc ("Enable BFI update verification for "
259	"CodeGenPrepare."));
260
261	static cl::opt<bool>
262	OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(Val: true),
263	cl::desc ("Enable converting phi types in CodeGenPrepare"));
264
265	static cl::opt<unsigned>
266	HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(Val: `10000`), cl::Hidden,
267	cl::desc ("Least BB number of huge function."));
268
269	static cl::opt<unsigned>
270	MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(Val: `100`),
271	cl::Hidden,
272	cl::desc ("Max number of address users to look at"));
273
274	static cl::opt<bool>
275	DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(Val: false),
276	cl::desc ("Disable elimination of dead PHI nodes."));
277
278	namespace {
279
280	enum ExtType {
281	ZeroExtension, // Zero extension has been seen.
282	SignExtension, // Sign extension has been seen.
283	BothExtension // This extension type is used if we saw sext after
284	// ZeroExtension had been set, or if we saw zext after
285	// SignExtension had been set. It makes the type
286	// information of a promoted instruction invalid.
287	};
288
289	enum ModifyDT {
290	NotModifyDT, // Not Modify any DT.
291	ModifyBBDT, // Modify the Basic Block Dominator Tree.
292	ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293	// This usually means we move/delete/insert instruction
294	// in a Basic Block. So we should re-iterate instructions
295	// in such Basic Block.
296	};
297
298	using SetOfInstrs = SmallPtrSet<Instruction *, `16`>;
299	using TypeIsSExt = PointerIntPair<Type *, `2`, ExtType>;
300	using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
301	using SExts = SmallVector<Instruction *, `16`>;
302	using ValueToSExts = MapVector<Value *, SExts>;
303
304	class TypePromotionTransaction;
305
306	class CodeGenPrepare {
307	friend class CodeGenPrepareLegacyPass;
308	const TargetMachine TM = nullptr*;
309	const TargetSubtargetInfo SubtargetInfo = nullptr*;
310	const TargetLowering TLI = nullptr*;
311	const TargetRegisterInfo TRI = nullptr*;
312	const TargetTransformInfo TTI = nullptr*;
313	const BasicBlockSectionsProfileReader BBSectionsProfileReader = nullptr*;
314	const TargetLibraryInfo TLInfo = nullptr*;
315	LoopInfo LI = nullptr*;
316	std::unique_ptr<BlockFrequencyInfo> BFI;
317	std::unique_ptr<BranchProbabilityInfo> BPI;
318	ProfileSummaryInfo PSI = nullptr*;
319
320	/// As we scan instructions optimizing them, this is the next instruction
321	/// to optimize. Transforms that can invalidate this should update it.
322	BasicBlock::iterator CurInstIterator;
323
324	/// Keeps track of non-local addresses that have been sunk into a block.
325	/// This allows us to avoid inserting duplicate code for blocks with
326	/// multiple load/stores of the same address. The usage of WeakTrackingVH
327	/// enables SunkAddrs to be treated as a cache whose entries can be
328	/// invalidated if a sunken address computation has been erased.
329	ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331	/// Keeps track of all instructions inserted for the current function.
332	SetOfInstrs InsertedInsts;
333
334	/// Keeps track of the type of the related instruction before their
335	/// promotion for the current function.
336	InstrToOrigTy PromotedInsts;
337
338	/// Keep track of instructions removed during promotion.
339	SetOfInstrs RemovedInsts;
340
341	/// Keep track of sext chains based on their initial value.
342	DenseMap<Value , Instruction > SeenChainsForSExt;
343
344	/// Keep track of GEPs accessing the same data structures such as structs or
345	/// arrays that are candidates to be split later because of their large
346	/// size.
347	MapVector<AssertingVH<Value>,
348	SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, `32`>>
349	LargeOffsetGEPMap;
350
351	/// Keep track of new GEP base after splitting the GEPs having large offset.
352	SmallSet<AssertingVH<Value>, `2`> NewGEPBases;
353
354	/// Map serial numbers to Large offset GEPs.
355	DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357	/// Keep track of SExt promoted.
358	ValueToSExts ValToSExtendedUses;
359
360	/// True if the function has the OptSize attribute.
361	bool OptSize;
362
363	/// DataLayout for the Function being processed.
364	const DataLayout DL = nullptr*;
365
366	/// Building the dominator tree can be expensive, so we only build it
367	/// lazily and update it when required.
368	std::unique_ptr<DominatorTree> DT;
369
370	public:
371	CodeGenPrepare(){};
372	CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373	/// If encounter huge function, we need to limit the build time.
374	bool IsHugeFunc = false;
375
376	/// FreshBBs is like worklist, it collected the updated BBs which need
377	/// to be optimized again.
378	/// Note: Consider building time in this pass, when a BB updated, we need
379	/// to insert such BB into FreshBBs for huge function.
380	SmallSet<BasicBlock *, `32`> FreshBBs;
381
382	void releaseMemory() {
383	// Clear per function information.
384	InsertedInsts.clear();
385	PromotedInsts.clear();
386	FreshBBs.clear();
387	BPI.reset();
388	BFI.reset();
389	}
390
391	bool run(Function &F, FunctionAnalysisManager &AM);
392
393	private:
394	template <typename F>
395	void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396	// Substituting can cause recursive simplifications, which can invalidate
397	// our iterator. Use a WeakTrackingVH to hold onto it in case this
398	// happens.
399	Value CurValue = &CurInstIterator;
400	WeakTrackingVH IterHandle(CurValue);
401
402	f();
403
404	// If the iterator instruction was recursively deleted, start over at the
405	// start of the block.
406	if (IterHandle != CurValue) {
407	CurInstIterator = BB->begin();
408	SunkAddrs.clear();
409	}
410	}
411
412	// Get the DominatorTree, building if necessary.
413	DominatorTree &getDT(Function &F) {
414	if (!DT)
415	DT = std::make_unique<DominatorTree>(args&: F);
416	return *DT;
417	}
418
419	void removeAllAssertingVHReferences(Value *V);
420	bool eliminateAssumptions(Function &F);
421	bool eliminateFallThrough(Function &F, DominatorTree DT = nullptr*);
422	bool eliminateMostlyEmptyBlocks(Function &F);
423	BasicBlock findDestBlockOfMergeableEmptyBlock(BasicBlock BB);
424	bool canMergeBlocks(const BasicBlock BB, const* BasicBlock DestBB) const*;
425	void eliminateMostlyEmptyBlock(BasicBlock *BB);
426	bool isMergingEmptyBlockProfitable(BasicBlock BB, BasicBlock DestBB,
427	bool isPreheader);
428	bool makeBitReverse(Instruction &I);
429	bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430	bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431	bool optimizeMemoryInst(Instruction MemoryInst, Value Addr, Type *AccessTy,
432	unsigned AddrSpace);
433	bool optimizeGatherScatterInst(Instruction MemoryInst, Value Ptr);
434	bool optimizeInlineAsmInst(CallInst *CS);
435	bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436	bool optimizeExt(Instruction *&I);
437	bool optimizeExtUses(Instruction *I);
438	bool optimizeLoadExt(LoadInst *Load);
439	bool optimizeShiftInst(BinaryOperator *BO);
440	bool optimizeFunnelShift(IntrinsicInst *Fsh);
441	bool optimizeSelectInst(SelectInst *SI);
442	bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443	bool optimizeSwitchType(SwitchInst *SI);
444	bool optimizeSwitchPhiConstants(SwitchInst *SI);
445	bool optimizeSwitchInst(SwitchInst *SI);
446	bool optimizeExtractElementInst(Instruction *Inst);
447	bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448	bool fixupDbgValue(Instruction *I);
449	bool fixupDbgVariableRecord(DbgVariableRecord &I);
450	bool fixupDbgVariableRecordsOnInst(Instruction &I);
451	bool placeDbgValues(Function &F);
452	bool placePseudoProbes(Function &F);
453	bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
454	LoadInst &LI, Instruction &Inst, bool HasPromoted);
455	bool tryToPromoteExts(TypePromotionTransaction &TPT,
456	const SmallVectorImpl<Instruction *> &Exts,
457	SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
458	unsigned CreatedInstsCost = `0`);
459	bool mergeSExts(Function &F);
460	bool splitLargeGEPOffsets();
461	bool optimizePhiType(PHINode Inst, SmallPtrSetImpl<PHINode > &Visited,
462	SmallPtrSetImpl<Instruction *> &DeletedInstrs);
463	bool optimizePhiTypes(Function &F);
464	bool performAddressTypePromotion(
465	Instruction &Inst, bool* AllowPromotionWithoutCommonHeader,
466	bool HasPromoted, TypePromotionTransaction &TPT,
467	SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
468	bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
469	bool simplifyOffsetableRelocate(GCStatepointInst &I);
470
471	bool tryToSinkFreeOperands(Instruction *I);
472	bool replaceMathCmpWithIntrinsic(BinaryOperator BO, Value Arg0, Value *Arg1,
473	CmpInst *Cmp, Intrinsic::ID IID);
474	bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
475	bool optimizeURem(Instruction *Rem);
476	bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477	bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
478	bool unfoldPowerOf2Test(CmpInst *Cmp);
479	void verifyBFIUpdates(Function &F);
480	bool _run(Function &F);
481	};
482
483	class CodeGenPrepareLegacyPass : public FunctionPass {
484	public:
485	static char ID; // Pass identification, replacement for typeid
486
487	CodeGenPrepareLegacyPass() : FunctionPass (ID) {
488	initializeCodeGenPrepareLegacyPassPass(*PassRegistry::getPassRegistry());
489	}
490
491	bool runOnFunction(Function &F) override;
492
493	StringRef getPassName() const override { return "CodeGen Prepare"; }
494
495	void getAnalysisUsage(AnalysisUsage &AU) const override {
496	// FIXME: When we can selectively preserve passes, preserve the domtree.
497	AU.addRequired<ProfileSummaryInfoWrapperPass>();
498	AU.addRequired<TargetLibraryInfoWrapperPass>();
499	AU.addRequired<TargetPassConfig>();
500	AU.addRequired<TargetTransformInfoWrapperPass>();
501	AU.addRequired<LoopInfoWrapperPass>();
502	AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
503	}
504	};
505
506	} // end anonymous namespace
507
508	char CodeGenPrepareLegacyPass::ID = `0`;
509
510	bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
511	if (skipFunction(F))
512	return false;
513	auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
514	CodeGenPrepare CGP(TM);
515	CGP.DL = &F.getDataLayout();
516	CGP.SubtargetInfo = TM->getSubtargetImpl(F);
517	CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
518	CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
519	CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
520	CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
521	CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
522	CGP.BPI.reset(p: new BranchProbabilityInfo (F, *CGP.LI));
523	CGP.BFI.reset(p: new BlockFrequencyInfo (F, CGP.BPI, CGP.LI));
524	CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
525	auto BBSPRWP =
526	getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
527	CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
528
529	return CGP._run(F);
530	}
531
532	INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
533	"Optimize for code generation", false, false)
534	INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
535	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
536	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
537	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
538	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
539	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
540	INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541	"Optimize for code generation", false, false)
542
543	FunctionPass *llvm::createCodeGenPrepareLegacyPass() {
544	return new CodeGenPrepareLegacyPass ();
545	}
546
547	PreservedAnalyses CodeGenPreparePass::run(Function &F,
548	FunctionAnalysisManager &AM) {
549	CodeGenPrepare CGP(TM);
550
551	bool Changed = CGP.run(F, AM);
552	if (!Changed)
553	return PreservedAnalyses::all();
554
555	PreservedAnalyses PA;
556	PA.preserve<TargetLibraryAnalysis>();
557	PA.preserve<TargetIRAnalysis>();
558	PA.preserve<LoopAnalysis>();
559	return PA;
560	}
561
562	bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
563	DL = &F.getDataLayout();
564	SubtargetInfo = TM->getSubtargetImpl(F);
565	TLI = SubtargetInfo->getTargetLowering();
566	TRI = SubtargetInfo->getRegisterInfo();
567	TLInfo = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
568	TTI = &AM.getResult<TargetIRAnalysis>(IR&: F);
569	LI = &AM.getResult<LoopAnalysis>(IR&: F);
570	BPI.reset(p: new BranchProbabilityInfo (F, *LI));
571	BFI.reset(p: new BlockFrequencyInfo (F, BPI, LI));
572	auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
573	PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
574	BBSectionsProfileReader =
575	AM.getCachedResult<BasicBlockSectionsProfileReaderAnalysis>(IR&: F);
576	return _run(F);
577	}
578
579	bool CodeGenPrepare::_run(Function &F) {
580	bool EverMadeChange = false;
581
582	OptSize = F.hasOptSize();
583	// Use the basic-block-sections profile to promote hot functions to .text.hot
584	// if requested.
585	if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
586	BBSectionsProfileReader->isFunctionHot(FuncName: F.getName())) {
587	F.setSectionPrefix("hot");
588	} else if (ProfileGuidedSectionPrefix) {
589	// The hot attribute overwrites profile count based hotness while profile
590	// counts based hotness overwrite the cold attribute.
591	// This is a conservative behabvior.
592	if (F.hasFnAttribute(Attribute::Hot) \|\|
593	PSI->isFunctionHotInCallGraph(F: &F, BFI&: *BFI))
594	F.setSectionPrefix("hot");
595	// If PSI shows this function is not hot, we will placed the function
596	// into unlikely section if (1) PSI shows this is a cold function, or
597	// (2) the function has a attribute of cold.
598	else if (PSI->isFunctionColdInCallGraph(F: &F, BFI&: *BFI) \|\|
599	F.hasFnAttribute(Attribute::Cold))
600	F.setSectionPrefix("unlikely");
601	else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
602	PSI->isFunctionHotnessUnknown(F))
603	F.setSectionPrefix("unknown");
604	}
605
606	/// This optimization identifies DIV instructions that can be
607	/// profitably bypassed and carried out with a shorter, faster divide.
608	if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
609	const DenseMap<unsigned int, unsigned int> &BypassWidths =
610	TLI->getBypassSlowDivWidths();
611	BasicBlock BB = &F.begin();
612	while (BB != nullptr) {
613	// bypassSlowDivision may create new BBs, but we don't want to reapply the
614	// optimization to those blocks.
615	BasicBlock *Next = BB->getNextNode();
616	if (!llvm::shouldOptimizeForSize(BB, PSI, BFI: BFI.get()))
617	EverMadeChange \|= bypassSlowDivision(BB, BypassWidth: BypassWidths);
618	BB = Next;
619	}
620	}
621
622	// Get rid of @llvm.assume builtins before attempting to eliminate empty
623	// blocks, since there might be blocks that only contain @llvm.assume calls
624	// (plus arguments that we can get rid of).
625	EverMadeChange \|= eliminateAssumptions(F);
626
627	// Eliminate blocks that contain only PHI nodes and an
628	// unconditional branch.
629	EverMadeChange \|= eliminateMostlyEmptyBlocks(F);
630
631	ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
632	if (!DisableBranchOpts)
633	EverMadeChange \|= splitBranchCondition(F, ModifiedDT);
634
635	// Split some critical edges where one of the sources is an indirect branch,
636	// to help generate sane code for PHIs involving such edges.
637	EverMadeChange \|=
638	SplitIndirectBrCriticalEdges(F, /IgnoreBlocksWithoutPHI=/true);
639
640	// If we are optimzing huge function, we need to consider the build time.
641	// Because the basic algorithm's complex is near O(N!).
642	IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
643
644	// Transformations above may invalidate dominator tree and/or loop info.
645	DT.reset();
646	LI->releaseMemory();
647	LI->analyze(DomTree: getDT(F));
648
649	bool MadeChange = true;
650	bool FuncIterated = false;
651	while (MadeChange) {
652	MadeChange = false;
653
654	for (BasicBlock &BB : llvm::make_early_inc_range(Range&: F)) {
655	if (FuncIterated && !FreshBBs.contains(Ptr: &BB))
656	continue;
657
658	ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
659	bool Changed = optimizeBlock(BB, ModifiedDT&: ModifiedDTOnIteration);
660
661	if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
662	DT.reset();
663
664	MadeChange \|= Changed;
665	if (IsHugeFunc) {
666	// If the BB is updated, it may still has chance to be optimized.
667	// This usually happen at sink optimization.
668	// For example:
669	//
670	// bb0：
671	// %and = and i32 %a, 4
672	// %cmp = icmp eq i32 %and, 0
673	//
674	// If the %cmp sink to other BB, the %and will has chance to sink.
675	if (Changed)
676	FreshBBs.insert(Ptr: &BB);
677	else if (FuncIterated)
678	FreshBBs.erase(Ptr: &BB);
679	} else {
680	// For small/normal functions, we restart BB iteration if the dominator
681	// tree of the Function was changed.
682	if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
683	break;
684	}
685	}
686	// We have iterated all the BB in the (only work for huge) function.
687	FuncIterated = IsHugeFunc;
688
689	if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
690	MadeChange \|= mergeSExts(F);
691	if (!LargeOffsetGEPMap.empty())
692	MadeChange \|= splitLargeGEPOffsets();
693	MadeChange \|= optimizePhiTypes(F);
694
695	if (MadeChange)
696	eliminateFallThrough(F, DT: DT.get());
697
698	#ifndef NDEBUG
699	if (MadeChange && VerifyLoopInfo)
700	LI->verify(DomTree: getDT(F));
701	#endif
702
703	// Really free removed instructions during promotion.
704	for (Instruction *I : RemovedInsts)
705	I->deleteValue();
706
707	EverMadeChange \|= MadeChange;
708	SeenChainsForSExt.clear();
709	ValToSExtendedUses.clear();
710	RemovedInsts.clear();
711	LargeOffsetGEPMap.clear();
712	LargeOffsetGEPID.clear();
713	}
714
715	NewGEPBases.clear();
716	SunkAddrs.clear();
717
718	if (!DisableBranchOpts) {
719	MadeChange = false;
720	// Use a set vector to get deterministic iteration order. The order the
721	// blocks are removed may affect whether or not PHI nodes in successors
722	// are removed.
723	SmallSetVector<BasicBlock *, `8`> WorkList;
724	for (BasicBlock &BB : F) {
725	SmallVector<BasicBlock *, `2`> Successors(successors(BB: &BB));
726	MadeChange \|= ConstantFoldTerminator(BB: &BB, DeleteDeadConditions: true);
727	if (!MadeChange)
728	continue;
729
730	for (BasicBlock *Succ : Successors)
731	if (pred_empty(BB: Succ))
732	WorkList.insert(X: Succ);
733	}
734
735	// Delete the dead blocks and any of their dead successors.
736	MadeChange \|= !WorkList.empty();
737	while (!WorkList.empty()) {
738	BasicBlock *BB = WorkList.pop_back_val();
739	SmallVector<BasicBlock *, `2`> Successors(successors(BB));
740
741	DeleteDeadBlock(BB);
742
743	for (BasicBlock *Succ : Successors)
744	if (pred_empty(BB: Succ))
745	WorkList.insert(X: Succ);
746	}
747
748	// Merge pairs of basic blocks with unconditional branches, connected by
749	// a single edge.
750	if (EverMadeChange \|\| MadeChange)
751	MadeChange \|= eliminateFallThrough(F);
752
753	EverMadeChange \|= MadeChange;
754	}
755
756	if (!DisableGCOpts) {
757	SmallVector<GCStatepointInst *, `2`> Statepoints;
758	for (BasicBlock &BB : F)
759	for (Instruction &I : BB)
760	if (auto *SP = dyn_cast<GCStatepointInst>(Val: &I))
761	Statepoints.push_back(Elt: SP);
762	for (auto &I : Statepoints)
763	EverMadeChange \|= simplifyOffsetableRelocate(I&: *I);
764	}
765
766	// Do this last to clean up use-before-def scenarios introduced by other
767	// preparatory transforms.
768	EverMadeChange \|= placeDbgValues(F);
769	EverMadeChange \|= placePseudoProbes(F);
770
771	#ifndef NDEBUG
772	if (VerifyBFIUpdates)
773	verifyBFIUpdates(F);
774	#endif
775
776	return EverMadeChange;
777	}
778
779	bool CodeGenPrepare::eliminateAssumptions(Function &F) {
780	bool MadeChange = false;
781	for (BasicBlock &BB : F) {
782	CurInstIterator = BB.begin();
783	while (CurInstIterator != BB.end()) {
784	Instruction I = &(CurInstIterator ++);
785	if (auto *Assume = dyn_cast<AssumeInst>(Val: I)) {
786	MadeChange = true;
787	Value *Operand = Assume->getOperand(i_nocapture: `0`);
788	Assume->eraseFromParent();
789
790	resetIteratorIfInvalidatedWhileCalling(BB: &BB, f: [&]() {
791	RecursivelyDeleteTriviallyDeadInstructions(V: Operand, TLI: TLInfo, MSSAU: nullptr);
792	});
793	}
794	}
795	}
796	return MadeChange;
797	}
798
799	/// An instruction is about to be deleted, so remove all references to it in our
800	/// GEP-tracking data strcutures.
801	void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
802	LargeOffsetGEPMap.erase(Key: V);
803	NewGEPBases.erase(V);
804
805	auto GEP = dyn_cast<GetElementPtrInst>(Val: V);
806	if (!GEP)
807	return;
808
809	LargeOffsetGEPID.erase(Val: GEP);
810
811	auto VecI = LargeOffsetGEPMap.find(Key: GEP->getPointerOperand());
812	if (VecI == LargeOffsetGEPMap.end())
813	return;
814
815	auto &GEPVector = VecI->second;
816	llvm::erase_if(C&: GEPVector, P: [=](auto &Elt) { return Elt.first == GEP; });
817
818	if (GEPVector.empty())
819	LargeOffsetGEPMap.erase(Iterator: VecI);
820	}
821
822	// Verify BFI has been updated correctly by recomputing BFI and comparing them.
823	void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
824	DominatorTree NewDT(F);
825	LoopInfo NewLI(NewDT);
826	BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
827	BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
828	NewBFI.verifyMatch(Other&: *BFI);
829	}
830
831	/// Merge basic blocks which are connected by a single edge, where one of the
832	/// basic blocks has a single successor pointing to the other basic block,
833	/// which has a single predecessor.
834	bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
835	bool Changed = false;
836	// Scan all of the blocks in the function, except for the entry block.
837	// Use a temporary array to avoid iterator being invalidated when
838	// deleting blocks.
839	SmallVector<WeakTrackingVH, `16`> Blocks(
840	llvm::make_pointer_range(Range: llvm::drop_begin(RangeOrContainer&: F)));
841
842	SmallSet<WeakTrackingVH, `16`> Preds;
843	for (auto &Block : Blocks) {
844	auto *BB = cast_or_null<BasicBlock>(Val&: Block);
845	if (!BB)
846	continue;
847	// If the destination block has a single pred, then this is a trivial
848	// edge, just collapse it.
849	BasicBlock *SinglePred = BB->getSinglePredecessor();
850
851	// Don't merge if BB's address is taken.
852	if (!SinglePred \|\| SinglePred == BB \|\| BB->hasAddressTaken())
853	continue;
854
855	// Make an effort to skip unreachable blocks.
856	if (DT && !DT->isReachableFromEntry(A: BB))
857	continue;
858
859	BranchInst *Term = dyn_cast<BranchInst>(Val: SinglePred->getTerminator());
860	if (Term && !Term->isConditional()) {
861	Changed = true;
862	LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
863
864	// Merge BB into SinglePred and delete it.
865	MergeBlockIntoPredecessor(BB, / DTU / nullptr, LI, / MSSAU / nullptr,
866	/ MemDep / nullptr,
867	/ PredecessorWithTwoSuccessors / false, DT);
868	Preds.insert(V: SinglePred);
869
870	if (IsHugeFunc) {
871	// Update FreshBBs to optimize the merged BB.
872	FreshBBs.insert(Ptr: SinglePred);
873	FreshBBs.erase(Ptr: BB);
874	}
875	}
876	}
877
878	// (Repeatedly) merging blocks into their predecessors can create redundant
879	// debug intrinsics.
880	for (const auto &Pred : Preds)
881	if (auto *BB = cast_or_null<BasicBlock>(Val: Pred))
882	RemoveRedundantDbgInstrs(BB);
883
884	return Changed;
885	}
886
887	/// Find a destination block from BB if BB is mergeable empty block.
888	BasicBlock CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock BB) {
889	// If this block doesn't end with an uncond branch, ignore it.
890	BranchInst *BI = dyn_cast<BranchInst>(Val: BB->getTerminator());
891	if (!BI \|\| !BI->isUnconditional())
892	return nullptr;
893
894	// If the instruction before the branch (skipping debug info) isn't a phi
895	// node, then other stuff is happening here.
896	BasicBlock::iterator BBI = BI->getIterator();
897	if (BBI != BB->begin()) {
898	--BBI;
899	while (isa<DbgInfoIntrinsic>(Val: BBI)) {
900	if (BBI == BB->begin())
901	break;
902	--BBI;
903	}
904	if (!isa<DbgInfoIntrinsic>(Val: BBI) && !isa<PHINode>(Val: BBI))
905	return nullptr;
906	}
907
908	// Do not break infinite loops.
909	BasicBlock *DestBB = BI->getSuccessor(i: `0`);
910	if (DestBB == BB)
911	return nullptr;
912
913	if (!canMergeBlocks(BB, DestBB))
914	DestBB = nullptr;
915
916	return DestBB;
917	}
918
919	/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
920	/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
921	/// edges in ways that are non-optimal for isel. Start by eliminating these
922	/// blocks so we can split them the way we want them.
923	bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
924	SmallPtrSet<BasicBlock *, `16`> Preheaders;
925	SmallVector<Loop *, `16`> LoopList(LI->begin(), LI->end());
926	while (!LoopList.empty()) {
927	Loop *L = LoopList.pop_back_val();
928	llvm::append_range(C&: LoopList, R&: *L);
929	if (BasicBlock *Preheader = L->getLoopPreheader())
930	Preheaders.insert(Ptr: Preheader);
931	}
932
933	bool MadeChange = false;
934	// Copy blocks into a temporary array to avoid iterator invalidation issues
935	// as we remove them.
936	// Note that this intentionally skips the entry block.
937	SmallVector<WeakTrackingVH, `16`> Blocks;
938	for (auto &Block : llvm::drop_begin(RangeOrContainer&: F)) {
939	// Delete phi nodes that could block deleting other empty blocks.
940	if (!DisableDeletePHIs)
941	MadeChange \|= DeleteDeadPHIs(BB: &Block, TLI: TLInfo);
942	Blocks.push_back(Elt: &Block);
943	}
944
945	for (auto &Block : Blocks) {
946	BasicBlock *BB = cast_or_null<BasicBlock>(Val&: Block);
947	if (!BB)
948	continue;
949	BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
950	if (!DestBB \|\|
951	!isMergingEmptyBlockProfitable(BB, DestBB, isPreheader: Preheaders.count(Ptr: BB)))
952	continue;
953
954	eliminateMostlyEmptyBlock(BB);
955	MadeChange = true;
956	}
957	return MadeChange;
958	}
959
960	bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
961	BasicBlock *DestBB,
962	bool isPreheader) {
963	// Do not delete loop preheaders if doing so would create a critical edge.
964	// Loop preheaders can be good locations to spill registers. If the
965	// preheader is deleted and we create a critical edge, registers may be
966	// spilled in the loop body instead.
967	if (!DisablePreheaderProtect && isPreheader &&
968	!(BB->getSinglePredecessor() &&
969	BB->getSinglePredecessor()->getSingleSuccessor()))
970	return false;
971
972	// Skip merging if the block's successor is also a successor to any callbr
973	// that leads to this block.
974	// FIXME: Is this really needed? Is this a correctness issue?
975	for (BasicBlock *Pred : predecessors(BB)) {
976	if (isa<CallBrInst>(Val: Pred->getTerminator()) &&
977	llvm::is_contained(Range: successors(BB: Pred), Element: DestBB))
978	return false;
979	}
980
981	// Try to skip merging if the unique predecessor of BB is terminated by a
982	// switch or indirect branch instruction, and BB is used as an incoming block
983	// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
984	// add COPY instructions in the predecessor of BB instead of BB (if it is not
985	// merged). Note that the critical edge created by merging such blocks wont be
986	// split in MachineSink because the jump table is not analyzable. By keeping
987	// such empty block (BB), ISel will place COPY instructions in BB, not in the
988	// predecessor of BB.
989	BasicBlock *Pred = BB->getUniquePredecessor();
990	if (!Pred \|\| !(isa<SwitchInst>(Val: Pred->getTerminator()) \|\|
991	isa<IndirectBrInst>(Val: Pred->getTerminator())))
992	return true;
993
994	if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
995	return true;
996
997	// We use a simple cost heuristic which determine skipping merging is
998	// profitable if the cost of skipping merging is less than the cost of
999	// merging : Cost(skipping merging) < Cost(merging BB), where the
1000	// Cost(skipping merging) is Freq(BB) (Cost(Copy) + Cost(Branch)), and*
1001	// the Cost(merging BB) is Freq(Pred) Cost(Copy).*
1002	// Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1003	// Freq(Pred) / Freq(BB) > 2.
1004	// Note that if there are multiple empty blocks sharing the same incoming
1005	// value for the PHIs in the DestBB, we consider them together. In such
1006	// case, Cost(merging BB) will be the sum of their frequencies.
1007
1008	if (!isa<PHINode>(Val: DestBB->begin()))
1009	return true;
1010
1011	SmallPtrSet<BasicBlock *, `16`> SameIncomingValueBBs;
1012
1013	// Find all other incoming blocks from which incoming values of all PHIs in
1014	// DestBB are the same as the ones from BB.
1015	for (BasicBlock *DestBBPred : predecessors(BB: DestBB)) {
1016	if (DestBBPred == BB)
1017	continue;
1018
1019	if (llvm::all_of(Range: DestBB->phis(), P: [&](const PHINode &DestPN) {
1020	return DestPN.getIncomingValueForBlock(BB) ==
1021	DestPN.getIncomingValueForBlock(BB: DestBBPred);
1022	}))
1023	SameIncomingValueBBs.insert(Ptr: DestBBPred);
1024	}
1025
1026	// See if all BB's incoming values are same as the value from Pred. In this
1027	// case, no reason to skip merging because COPYs are expected to be place in
1028	// Pred already.
1029	if (SameIncomingValueBBs.count(Ptr: Pred))
1030	return true;
1031
1032	BlockFrequency PredFreq = BFI ->getBlockFreq(BB: Pred);
1033	BlockFrequency BBFreq = BFI ->getBlockFreq(BB);
1034
1035	for (auto *SameValueBB : SameIncomingValueBBs)
1036	if (SameValueBB->getUniquePredecessor() == Pred &&
1037	DestBB == findDestBlockOfMergeableEmptyBlock(BB: SameValueBB))
1038	BBFreq += BFI ->getBlockFreq(BB: SameValueBB);
1039
1040	std::optional<BlockFrequency> Limit = BBFreq.mul(Factor: FreqRatioToSkipMerge);
1041	return !Limit \|\| PredFreq <= *Limit;
1042	}
1043
1044	/// Return true if we can merge BB into DestBB if there is a single
1045	/// unconditional branch between them, and BB contains no other non-phi
1046	/// instructions.
1047	bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1048	const BasicBlock DestBB) const* {
1049	// We only want to eliminate blocks whose phi nodes are used by phi nodes in
1050	// the successor. If there are more complex condition (e.g. preheaders),
1051	// don't mess around with them.
1052	for (const PHINode &PN : BB->phis()) {
1053	for (const User *U : PN.users()) {
1054	const Instruction *UI = cast<Instruction>(Val: U);
1055	if (UI->getParent() != DestBB \|\| !isa<PHINode>(Val: UI))
1056	return false;
1057	// If User is inside DestBB block and it is a PHINode then check
1058	// incoming value. If incoming value is not from BB then this is
1059	// a complex condition (e.g. preheaders) we want to avoid here.
1060	if (UI->getParent() == DestBB) {
1061	if (const PHINode *UPN = dyn_cast<PHINode>(Val: UI))
1062	for (unsigned I = `0`, E = UPN->getNumIncomingValues(); I != E; ++I) {
1063	Instruction *Insn = dyn_cast<Instruction>(Val: UPN->getIncomingValue(i: I));
1064	if (Insn && Insn->getParent() == BB &&
1065	Insn->getParent() != UPN->getIncomingBlock(i: I))
1066	return false;
1067	}
1068	}
1069	}
1070	}
1071
1072	// If BB and DestBB contain any common predecessors, then the phi nodes in BB
1073	// and DestBB may have conflicting incoming values for the block. If so, we
1074	// can't merge the block.
1075	const PHINode *DestBBPN = dyn_cast<PHINode>(Val: DestBB->begin());
1076	if (!DestBBPN)
1077	return true; // no conflict.
1078
1079	// Collect the preds of BB.
1080	SmallPtrSet<const BasicBlock *, `16`> BBPreds;
1081	if (const PHINode *BBPN = dyn_cast<PHINode>(Val: BB->begin())) {
1082	// It is faster to get preds from a PHI than with pred_iterator.
1083	for (unsigned i = `0`, e = BBPN->getNumIncomingValues(); i != e; ++i)
1084	BBPreds.insert(Ptr: BBPN->getIncomingBlock(i));
1085	} else {
1086	BBPreds.insert_range(R: predecessors(BB));
1087	}
1088
1089	// Walk the preds of DestBB.
1090	for (unsigned i = `0`, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1091	BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1092	if (BBPreds.count(Ptr: Pred)) { // Common predecessor?
1093	for (const PHINode &PN : DestBB->phis()) {
1094	const Value *V1 = PN.getIncomingValueForBlock(BB: Pred);
1095	const Value *V2 = PN.getIncomingValueForBlock(BB);
1096
1097	// If V2 is a phi node in BB, look up what the mapped value will be.
1098	if (const PHINode *V2PN = dyn_cast<PHINode>(Val: V2))
1099	if (V2PN->getParent() == BB)
1100	V2 = V2PN->getIncomingValueForBlock(BB: Pred);
1101
1102	// If there is a conflict, bail out.
1103	if (V1 != V2)
1104	return false;
1105	}
1106	}
1107	}
1108
1109	return true;
1110	}
1111
1112	/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1113	static void replaceAllUsesWith(Value Old, Value New,
1114	SmallSet<BasicBlock *, `32`> &FreshBBs,
1115	bool IsHuge) {
1116	auto *OldI = dyn_cast<Instruction>(Val: Old);
1117	if (OldI) {
1118	for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1119	UI != E; ++UI) {
1120	Instruction User = cast<Instruction>(Val: UI);
1121	if (IsHuge)
1122	FreshBBs.insert(Ptr: User->getParent());
1123	}
1124	}
1125	Old->replaceAllUsesWith(V: New);
1126	}
1127
1128	/// Eliminate a basic block that has only phi's and an unconditional branch in
1129	/// it.
1130	void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1131	BranchInst *BI = cast<BranchInst>(Val: BB->getTerminator());
1132	BasicBlock *DestBB = BI->getSuccessor(i: `0`);
1133
1134	LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1135	<< BB << DestBB);
1136
1137	// If the destination block has a single pred, then this is a trivial edge,
1138	// just collapse it.
1139	if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1140	if (SinglePred != DestBB) {
1141	assert(SinglePred == BB &&
1142	"Single predecessor not the same as predecessor");
1143	// Merge DestBB into SinglePred/BB and delete it.
1144	MergeBlockIntoPredecessor(BB: DestBB);
1145	// Note: BB(=SinglePred) will not be deleted on this path.
1146	// DestBB(=its single successor) is the one that was deleted.
1147	LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1148
1149	if (IsHugeFunc) {
1150	// Update FreshBBs to optimize the merged BB.
1151	FreshBBs.insert(Ptr: SinglePred);
1152	FreshBBs.erase(Ptr: DestBB);
1153	}
1154	return;
1155	}
1156	}
1157
1158	// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1159	// to handle the new incoming edges it is about to have.
1160	for (PHINode &PN : DestBB->phis()) {
1161	// Remove the incoming value for BB, and remember it.
1162	Value InVal = PN.removeIncomingValue(BB, DeletePHIIfEmpty: false*);
1163
1164	// Two options: either the InVal is a phi node defined in BB or it is some
1165	// value that dominates BB.
1166	PHINode *InValPhi = dyn_cast<PHINode>(Val: InVal);
1167	if (InValPhi && InValPhi->getParent() == BB) {
1168	// Add all of the input values of the input PHI as inputs of this phi.
1169	for (unsigned i = `0`, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1170	PN.addIncoming(V: InValPhi->getIncomingValue(i),
1171	BB: InValPhi->getIncomingBlock(i));
1172	} else {
1173	// Otherwise, add one instance of the dominating value for each edge that
1174	// we will be adding.
1175	if (PHINode *BBPN = dyn_cast<PHINode>(Val: BB->begin())) {
1176	for (unsigned i = `0`, e = BBPN->getNumIncomingValues(); i != e; ++i)
1177	PN.addIncoming(V: InVal, BB: BBPN->getIncomingBlock(i));
1178	} else {
1179	for (BasicBlock *Pred : predecessors(BB))
1180	PN.addIncoming(V: InVal, BB: Pred);
1181	}
1182	}
1183	}
1184
1185	// Preserve loop Metadata.
1186	if (BI->hasMetadata(KindID: LLVMContext::MD_loop)) {
1187	for (auto *Pred : predecessors(BB))
1188	Pred->getTerminator()->copyMetadata(SrcInst: *BI, WL: LLVMContext::MD_loop);
1189	}
1190
1191	// The PHIs are now updated, change everything that refers to BB to use
1192	// DestBB and remove BB.
1193	BB->replaceAllUsesWith(V: DestBB);
1194	BB->eraseFromParent();
1195	++NumBlocksElim;
1196
1197	LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1198	}
1199
1200	// Computes a map of base pointer relocation instructions to corresponding
1201	// derived pointer relocation instructions given a vector of all relocate calls
1202	static void computeBaseDerivedRelocateMap(
1203	const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1204	MapVector<GCRelocateInst , SmallVector<GCRelocateInst , `0`>>
1205	&RelocateInstMap) {
1206	// Collect information in two maps: one primarily for locating the base object
1207	// while filling the second map; the second map is the final structure holding
1208	// a mapping between Base and corresponding Derived relocate calls
1209	MapVector<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
1210	for (auto *ThisRelocate : AllRelocateCalls) {
1211	auto K = std::make_pair(x: ThisRelocate->getBasePtrIndex(),
1212	y: ThisRelocate->getDerivedPtrIndex());
1213	RelocateIdxMap.insert(KV: std::make_pair(x&: K, y&: ThisRelocate));
1214	}
1215	for (auto &Item : RelocateIdxMap) {
1216	std::pair<unsigned, unsigned> Key = Item.first;
1217	if (Key.first == Key.second)
1218	// Base relocation: nothing to insert
1219	continue;
1220
1221	GCRelocateInst *I = Item.second;
1222	auto BaseKey = std::make_pair(x&: Key.first, y&: Key.first);
1223
1224	// We're iterating over RelocateIdxMap so we cannot modify it.
1225	auto MaybeBase = RelocateIdxMap.find(Key: BaseKey);
1226	if (MaybeBase == RelocateIdxMap.end())
1227	// TODO: We might want to insert a new base object relocate and gep off
1228	// that, if there are enough derived object relocates.
1229	continue;
1230
1231	RelocateInstMap [MaybeBase->second].push_back(Elt: I);
1232	}
1233	}
1234
1235	// Accepts a GEP and extracts the operands into a vector provided they're all
1236	// small integer constants
1237	static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
1238	SmallVectorImpl<Value *> &OffsetV) {
1239	for (unsigned i = `1`; i < GEP->getNumOperands(); i++) {
1240	// Only accept small constant integer operands
1241	auto *Op = dyn_cast<ConstantInt>(Val: GEP->getOperand(i_nocapture: i));
1242	if (!Op \|\| Op->getZExtValue() > `20`)
1243	return false;
1244	}
1245
1246	for (unsigned i = `1`; i < GEP->getNumOperands(); i++)
1247	OffsetV.push_back(Elt: GEP->getOperand(i_nocapture: i));
1248	return true;
1249	}
1250
1251	// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1252	// replace, computes a replacement, and affects it.
1253	static bool
1254	simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
1255	const SmallVectorImpl<GCRelocateInst *> &Targets) {
1256	bool MadeChange = false;
1257	// We must ensure the relocation of derived pointer is defined after
1258	// relocation of base pointer. If we find a relocation corresponding to base
1259	// defined earlier than relocation of base then we move relocation of base
1260	// right before found relocation. We consider only relocation in the same
1261	// basic block as relocation of base. Relocations from other basic block will
1262	// be skipped by optimization and we do not care about them.
1263	for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1264	&*R != RelocatedBase; ++R)
1265	if (auto *RI = dyn_cast<GCRelocateInst>(Val&: R))
1266	if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1267	if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1268	RelocatedBase->moveBefore(InsertPos: RI->getIterator());
1269	MadeChange = true;
1270	break;
1271	}
1272
1273	for (GCRelocateInst *ToReplace : Targets) {
1274	assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1275	"Not relocating a derived object of the original base object");
1276	if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1277	// A duplicate relocate call. TODO: coalesce duplicates.
1278	continue;
1279	}
1280
1281	if (RelocatedBase->getParent() != ToReplace->getParent()) {
1282	// Base and derived relocates are in different basic blocks.
1283	// In this case transform is only valid when base dominates derived
1284	// relocate. However it would be too expensive to check dominance
1285	// for each such relocate, so we skip the whole transformation.
1286	continue;
1287	}
1288
1289	Value *Base = ToReplace->getBasePtr();
1290	auto *Derived = dyn_cast<GetElementPtrInst>(Val: ToReplace->getDerivedPtr());
1291	if (!Derived \|\| Derived->getPointerOperand() != Base)
1292	continue;
1293
1294	SmallVector<Value *, `2`> OffsetV;
1295	if (!getGEPSmallConstantIntOffsetV(GEP: Derived, OffsetV))
1296	continue;
1297
1298	// Create a Builder and replace the target callsite with a gep
1299	assert(RelocatedBase->getNextNode() &&
1300	"Should always have one since it's not a terminator");
1301
1302	// Insert after RelocatedBase
1303	IRBuilder<> Builder(RelocatedBase->getNextNode());
1304	Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1305
1306	// If gc_relocate does not match the actual type, cast it to the right type.
1307	// In theory, there must be a bitcast after gc_relocate if the type does not
1308	// match, and we should reuse it to get the derived pointer. But it could be
1309	// cases like this:
1310	// bb1:
1311	// ...
1312	// %g1 = call coldcc i8 addrspace(1)*
1313	// @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1314	//
1315	// bb2:
1316	// ...
1317	// %g2 = call coldcc i8 addrspace(1)*
1318	// @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1319	//
1320	// merge:
1321	// %p1 = phi i8 addrspace(1) [ %g1, %bb1 ], [ %g2, %bb2 ]*
1322	// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1323	//
1324	// In this case, we can not find the bitcast any more. So we insert a new
1325	// bitcast no matter there is already one or not. In this way, we can handle
1326	// all cases, and the extra bitcast should be optimized away in later
1327	// passes.
1328	Value *ActualRelocatedBase = RelocatedBase;
1329	if (RelocatedBase->getType() != Base->getType()) {
1330	ActualRelocatedBase =
1331	Builder.CreateBitCast(V: RelocatedBase, DestTy: Base->getType());
1332	}
1333	Value *Replacement =
1334	Builder.CreateGEP(Ty: Derived->getSourceElementType(), Ptr: ActualRelocatedBase,
1335	IdxList: ArrayRef(OffsetV));
1336	Replacement->takeName(V: ToReplace);
1337	// If the newly generated derived pointer's type does not match the original
1338	// derived pointer's type, cast the new derived pointer to match it. Same
1339	// reasoning as above.
1340	Value *ActualReplacement = Replacement;
1341	if (Replacement->getType() != ToReplace->getType()) {
1342	ActualReplacement =
1343	Builder.CreateBitCast(V: Replacement, DestTy: ToReplace->getType());
1344	}
1345	ToReplace->replaceAllUsesWith(V: ActualReplacement);
1346	ToReplace->eraseFromParent();
1347
1348	MadeChange = true;
1349	}
1350	return MadeChange;
1351	}
1352
1353	// Turns this:
1354	//
1355	// %base = ...
1356	// %ptr = gep %base + 15
1357	// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1358	// %base' = relocate(%tok, i32 4, i32 4)
1359	// %ptr' = relocate(%tok, i32 4, i32 5)
1360	// %val = load %ptr'
1361	//
1362	// into this:
1363	//
1364	// %base = ...
1365	// %ptr = gep %base + 15
1366	// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1367	// %base' = gc.relocate(%tok, i32 4, i32 4)
1368	// %ptr' = gep %base' + 15
1369	// %val = load %ptr'
1370	bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1371	bool MadeChange = false;
1372	SmallVector<GCRelocateInst *, `2`> AllRelocateCalls;
1373	for (auto *U : I.users())
1374	if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(Val: U))
1375	// Collect all the relocate calls associated with a statepoint
1376	AllRelocateCalls.push_back(Elt: Relocate);
1377
1378	// We need at least one base pointer relocation + one derived pointer
1379	// relocation to mangle
1380	if (AllRelocateCalls.size() < `2`)
1381	return false;
1382
1383	// RelocateInstMap is a mapping from the base relocate instruction to the
1384	// corresponding derived relocate instructions
1385	MapVector<GCRelocateInst , SmallVector<GCRelocateInst , `0`>> RelocateInstMap;
1386	computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1387	if (RelocateInstMap.empty())
1388	return false;
1389
1390	for (auto &Item : RelocateInstMap)
1391	// Item.first is the RelocatedBase to offset against
1392	// Item.second is the vector of Targets to replace
1393	MadeChange = simplifyRelocatesOffABase(RelocatedBase: Item.first, Targets: Item.second);
1394	return MadeChange;
1395	}
1396
1397	/// Sink the specified cast instruction into its user blocks.
1398	static bool SinkCast(CastInst *CI) {
1399	BasicBlock *DefBB = CI->getParent();
1400
1401	/// InsertedCasts - Only insert a cast in each block once.
1402	DenseMap<BasicBlock , CastInst > InsertedCasts;
1403
1404	bool MadeChange = false;
1405	for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1406	UI != E;) {
1407	Use &TheUse = UI.getUse();
1408	Instruction User = cast<Instruction>(Val: UI);
1409
1410	// Figure out which BB this cast is used in. For PHI's this is the
1411	// appropriate predecessor block.
1412	BasicBlock *UserBB = User->getParent();
1413	if (PHINode *PN = dyn_cast<PHINode>(Val: User)) {
1414	UserBB = PN->getIncomingBlock(U: TheUse);
1415	}
1416
1417	// Preincrement use iterator so we don't invalidate it.
1418	++UI;
1419
1420	// The first insertion point of a block containing an EH pad is after the
1421	// pad. If the pad is the user, we cannot sink the cast past the pad.
1422	if (User->isEHPad())
1423	continue;
1424
1425	// If the block selected to receive the cast is an EH pad that does not
1426	// allow non-PHI instructions before the terminator, we can't sink the
1427	// cast.
1428	if (UserBB->getTerminator()->isEHPad())
1429	continue;
1430
1431	// If this user is in the same block as the cast, don't change the cast.
1432	if (UserBB == DefBB)
1433	continue;
1434
1435	// If we have already inserted a cast into this block, use it.
1436	CastInst *&InsertedCast = InsertedCasts [UserBB];
1437
1438	if (!InsertedCast) {
1439	BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1440	assert(InsertPt != UserBB->end());
1441	InsertedCast = cast<CastInst>(Val: CI->clone());
1442	InsertedCast->insertBefore(BB&: *UserBB, InsertPos: InsertPt);
1443	}
1444
1445	// Replace a use of the cast with a use of the new cast.
1446	TheUse = InsertedCast;
1447	MadeChange = true;
1448	++NumCastUses;
1449	}
1450
1451	// If we removed all uses, nuke the cast.
1452	if (CI->use_empty()) {
1453	salvageDebugInfo(I&: *CI);
1454	CI->eraseFromParent();
1455	MadeChange = true;
1456	}
1457
1458	return MadeChange;
1459	}
1460
1461	/// If the specified cast instruction is a noop copy (e.g. it's casting from
1462	/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1463	/// reduce the number of virtual registers that must be created and coalesced.
1464	///
1465	/// Return true if any changes are made.
1466	static bool OptimizeNoopCopyExpression(CastInst CI, const* TargetLowering &TLI,
1467	const DataLayout &DL) {
1468	// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1469	// than sinking only nop casts, but is helpful on some platforms.
1470	if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: CI)) {
1471	if (!TLI.isFreeAddrSpaceCast(SrcAS: ASC->getSrcAddressSpace(),
1472	DestAS: ASC->getDestAddressSpace()))
1473	return false;
1474	}
1475
1476	// If this is a noop copy,
1477	EVT SrcVT = TLI.getValueType(DL, Ty: CI->getOperand(i_nocapture: `0`)->getType());
1478	EVT DstVT = TLI.getValueType(DL, Ty: CI->getType());
1479
1480	// This is an fp<->int conversion?
1481	if (SrcVT.isInteger() != DstVT.isInteger())
1482	return false;
1483
1484	// If this is an extension, it will be a zero or sign extension, which
1485	// isn't a noop.
1486	if (SrcVT.bitsLT(VT: DstVT))
1487	return false;
1488
1489	// If these values will be promoted, find out what they will be promoted
1490	// to. This helps us consider truncates on PPC as noop copies when they
1491	// are.
1492	if (TLI.getTypeAction(Context&: CI->getContext(), VT: SrcVT) ==
1493	TargetLowering::TypePromoteInteger)
1494	SrcVT = TLI.getTypeToTransformTo(Context&: CI->getContext(), VT: SrcVT);
1495	if (TLI.getTypeAction(Context&: CI->getContext(), VT: DstVT) ==
1496	TargetLowering::TypePromoteInteger)
1497	DstVT = TLI.getTypeToTransformTo(Context&: CI->getContext(), VT: DstVT);
1498
1499	// If, after promotion, these are the same types, this is a noop copy.
1500	if (SrcVT != DstVT)
1501	return false;
1502
1503	return SinkCast(CI);
1504	}
1505
1506	// Match a simple increment by constant operation. Note that if a sub is
1507	// matched, the step is negated (as if the step had been canonicalized to
1508	// an add, even though we leave the instruction alone.)
1509	static bool matchIncrement(const Instruction IVInc, Instruction &LHS,
1510	Constant *&Step) {
1511	if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) \|\|
1512	match(IVInc, m_ExtractValue<`0`>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1513	m_Instruction(LHS), m_Constant(Step)))))
1514	return true;
1515	if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) \|\|
1516	match(IVInc, m_ExtractValue<`0`>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1517	m_Instruction(LHS), m_Constant(Step))))) {
1518	Step = ConstantExpr::getNeg(C: Step);
1519	return true;
1520	}
1521	return false;
1522	}
1523
1524	/// If given \p PN is an inductive variable with value IVInc coming from the
1525	/// backedge, and on each iteration it gets increased by Step, return pair
1526	/// <IVInc, Step>. Otherwise, return std::nullopt.
1527	static std::optional<std::pair<Instruction , Constant >>
1528	getIVIncrement(const PHINode PN, const* LoopInfo *LI) {
1529	const Loop *L = LI->getLoopFor(BB: PN->getParent());
1530	if (!L \|\| L->getHeader() != PN->getParent() \|\| !L->getLoopLatch())
1531	return std::nullopt;
1532	auto *IVInc =
1533	dyn_cast<Instruction>(Val: PN->getIncomingValueForBlock(BB: L->getLoopLatch()));
1534	if (!IVInc \|\| LI->getLoopFor(BB: IVInc->getParent()) != L)
1535	return std::nullopt;
1536	Instruction LHS = nullptr*;
1537	Constant Step = nullptr*;
1538	if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1539	return std::make_pair(x&: IVInc, y&: Step);
1540	return std::nullopt;
1541	}
1542
1543	static bool isIVIncrement(const Value V, const* LoopInfo *LI) {
1544	auto *I = dyn_cast<Instruction>(Val: V);
1545	if (!I)
1546	return false;
1547	Instruction LHS = nullptr*;
1548	Constant Step = nullptr*;
1549	if (!matchIncrement(IVInc: I, LHS, Step))
1550	return false;
1551	if (auto *PN = dyn_cast<PHINode>(Val: LHS))
1552	if (auto IVInc = getIVIncrement(PN, LI))
1553	return IVInc ->first == I;
1554	return false;
1555	}
1556
1557	bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1558	Value Arg0, Value Arg1,
1559	CmpInst *Cmp,
1560	Intrinsic::ID IID) {
1561	auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1562	if (!isIVIncrement(V: BO, LI))
1563	return false;
1564	const Loop *L = LI->getLoopFor(BB: BO->getParent());
1565	assert(L && "L should not be null after isIVIncrement()");
1566	// Do not risk on moving increment into a child loop.
1567	if (LI->getLoopFor(BB: Cmp->getParent()) != L)
1568	return false;
1569
1570	// Finally, we need to ensure that the insert point will dominate all
1571	// existing uses of the increment.
1572
1573	auto &DT = getDT(F&: *BO->getParent()->getParent());
1574	if (DT.dominates(A: Cmp->getParent(), B: BO->getParent()))
1575	// If we're moving up the dom tree, all uses are trivially dominated.
1576	// (This is the common case for code produced by LSR.)
1577	return true;
1578
1579	// Otherwise, special case the single use in the phi recurrence.
1580	return BO->hasOneUse() && DT.dominates(A: Cmp->getParent(), B: L->getLoopLatch());
1581	};
1582	if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement (BO)) {
1583	// We used to use a dominator tree here to allow multi-block optimization.
1584	// But that was problematic because:
1585	// 1. It could cause a perf regression by hoisting the math op into the
1586	// critical path.
1587	// 2. It could cause a perf regression by creating a value that was live
1588	// across multiple blocks and increasing register pressure.
1589	// 3. Use of a dominator tree could cause large compile-time regression.
1590	// This is because we recompute the DT on every change in the main CGP
1591	// run-loop. The recomputing is probably unnecessary in many cases, so if
1592	// that was fixed, using a DT here would be ok.
1593	//
1594	// There is one important particular case we still want to handle: if BO is
1595	// the IV increment. Important properties that make it profitable:
1596	// - We can speculate IV increment anywhere in the loop (as long as the
1597	// indvar Phi is its only user);
1598	// - Upon computing Cmp, we effectively compute something equivalent to the
1599	// IV increment (despite it loops differently in the IR). So moving it up
1600	// to the cmp point does not really increase register pressure.
1601	return false;
1602	}
1603
1604	// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1605	if (BO->getOpcode() == Instruction::Add &&
1606	IID == Intrinsic::usub_with_overflow) {
1607	assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1608	Arg1 = ConstantExpr::getNeg(C: cast<Constant>(Val: Arg1));
1609	}
1610
1611	// Insert at the first instruction of the pair.
1612	Instruction InsertPt = nullptr*;
1613	for (Instruction &Iter : *Cmp->getParent()) {
1614	// If BO is an XOR, it is not guaranteed that it comes after both inputs to
1615	// the overflow intrinsic are defined.
1616	if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) \|\| &Iter == Cmp) {
1617	InsertPt = &Iter;
1618	break;
1619	}
1620	}
1621	assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1622
1623	IRBuilder<> Builder(InsertPt);
1624	Value *MathOV = Builder.CreateBinaryIntrinsic(ID: IID, LHS: Arg0, RHS: Arg1);
1625	if (BO->getOpcode() != Instruction::Xor) {
1626	Value *Math = Builder.CreateExtractValue(Agg: MathOV, Idxs: `0`, Name: "math");
1627	replaceAllUsesWith(Old: BO, New: Math, FreshBBs, IsHuge: IsHugeFunc);
1628	} else
1629	assert(BO->hasOneUse() &&
1630	"Patterns with XOr should use the BO only in the compare");
1631	Value *OV = Builder.CreateExtractValue(Agg: MathOV, Idxs: `1`, Name: "ov");
1632	replaceAllUsesWith(Old: Cmp, New: OV, FreshBBs, IsHuge: IsHugeFunc);
1633	Cmp->eraseFromParent();
1634	BO->eraseFromParent();
1635	return true;
1636	}
1637
1638	/// Match special-case patterns that check for unsigned add overflow.
1639	static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1640	BinaryOperator *&Add) {
1641	// Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1642	// Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1643	Value A = Cmp->getOperand(i_nocapture: `0`), B = Cmp->getOperand(i_nocapture: `1`);
1644
1645	// We are not expecting non-canonical/degenerate code. Just bail out.
1646	if (isa<Constant>(Val: A))
1647	return false;
1648
1649	ICmpInst::Predicate Pred = Cmp->getPredicate();
1650	if (Pred == ICmpInst::ICMP_EQ && match(V: B, P: m_AllOnes()))
1651	B = ConstantInt::get(Ty: B->getType(), V: `1`);
1652	else if (Pred == ICmpInst::ICMP_NE && match(V: B, P: m_ZeroInt()))
1653	B = Constant::getAllOnesValue(Ty: B->getType());
1654	else
1655	return false;
1656
1657	// Check the users of the variable operand of the compare looking for an add
1658	// with the adjusted constant.
1659	for (User *U : A->users()) {
1660	if (match(V: U, P: m_Add(L: m_Specific(V: A), R: m_Specific(V: B)))) {
1661	Add = cast<BinaryOperator>(Val: U);
1662	return true;
1663	}
1664	}
1665	return false;
1666	}
1667
1668	/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1669	/// intrinsic. Return true if any changes were made.
1670	bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1671	ModifyDT &ModifiedDT) {
1672	bool EdgeCase = false;
1673	Value A, B;
1674	BinaryOperator *Add;
1675	if (!match(V: Cmp, P: m_UAddWithOverflow(L: m_Value(V&: A), R: m_Value(V&: B), S: m_BinOp(I&: Add)))) {
1676	if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1677	return false;
1678	// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1679	A = Add->getOperand(i_nocapture: `0`);
1680	B = Add->getOperand(i_nocapture: `1`);
1681	EdgeCase = true;
1682	}
1683
1684	if (!TLI->shouldFormOverflowOp(Opcode: ISD::UADDO,
1685	VT: TLI->getValueType(DL: *DL, Ty: Add->getType()),
1686	MathUsed: Add->hasNUsesOrMore(N: EdgeCase ? `1` : `2`)))
1687	return false;
1688
1689	// We don't want to move around uses of condition values this late, so we
1690	// check if it is legal to create the call to the intrinsic in the basic
1691	// block containing the icmp.
1692	if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1693	return false;
1694
1695	if (!replaceMathCmpWithIntrinsic(BO: Add, Arg0: A, Arg1: B, Cmp,
1696	Intrinsic::IID: uadd_with_overflow))
1697	return false;
1698
1699	// Reset callers - do not crash by iterating over a dead instruction.
1700	ModifiedDT = ModifyDT::ModifyInstDT;
1701	return true;
1702	}
1703
1704	bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1705	ModifyDT &ModifiedDT) {
1706	// We are not expecting non-canonical/degenerate code. Just bail out.
1707	Value A = Cmp->getOperand(i_nocapture: `0`), B = Cmp->getOperand(i_nocapture: `1`);
1708	if (isa<Constant>(Val: A) && isa<Constant>(Val: B))
1709	return false;
1710
1711	// Convert (A u> B) to (A u< B) to simplify pattern matching.
1712	ICmpInst::Predicate Pred = Cmp->getPredicate();
1713	if (Pred == ICmpInst::ICMP_UGT) {
1714	std::swap(a&: A, b&: B);
1715	Pred = ICmpInst::ICMP_ULT;
1716	}
1717	// Convert special-case: (A == 0) is the same as (A u< 1).
1718	if (Pred == ICmpInst::ICMP_EQ && match(V: B, P: m_ZeroInt())) {
1719	B = ConstantInt::get(Ty: B->getType(), V: `1`);
1720	Pred = ICmpInst::ICMP_ULT;
1721	}
1722	// Convert special-case: (A != 0) is the same as (0 u< A).
1723	if (Pred == ICmpInst::ICMP_NE && match(V: B, P: m_ZeroInt())) {
1724	std::swap(a&: A, b&: B);
1725	Pred = ICmpInst::ICMP_ULT;
1726	}
1727	if (Pred != ICmpInst::ICMP_ULT)
1728	return false;
1729
1730	// Walk the users of a variable operand of a compare looking for a subtract or
1731	// add with that same operand. Also match the 2nd operand of the compare to
1732	// the add/sub, but that may be a negated constant operand of an add.
1733	Value *CmpVariableOperand = isa<Constant>(Val: A) ? B : A;
1734	BinaryOperator Sub = nullptr*;
1735	for (User *U : CmpVariableOperand->users()) {
1736	// A - B, A u< B --> usubo(A, B)
1737	if (match(V: U, P: m_Sub(L: m_Specific(V: A), R: m_Specific(V: B)))) {
1738	Sub = cast<BinaryOperator>(Val: U);
1739	break;
1740	}
1741
1742	// A + (-C), A u< C (canonicalized form of (sub A, C))
1743	const APInt CmpC, AddC;
1744	if (match(V: U, P: m_Add(L: m_Specific(V: A), R: m_APInt(Res&: AddC))) &&
1745	match(V: B, P: m_APInt(Res&: CmpC)) && AddC == -(CmpC)) {
1746	Sub = cast<BinaryOperator>(Val: U);
1747	break;
1748	}
1749	}
1750	if (!Sub)
1751	return false;
1752
1753	if (!TLI->shouldFormOverflowOp(Opcode: ISD::USUBO,
1754	VT: TLI->getValueType(DL: *DL, Ty: Sub->getType()),
1755	MathUsed: Sub->hasNUsesOrMore(N: `1`)))
1756	return false;
1757
1758	if (!replaceMathCmpWithIntrinsic(BO: Sub, Arg0: Sub->getOperand(i_nocapture: `0`), Arg1: Sub->getOperand(i_nocapture: `1`),
1759	Cmp, Intrinsic::IID: usub_with_overflow))
1760	return false;
1761
1762	// Reset callers - do not crash by iterating over a dead instruction.
1763	ModifiedDT = ModifyDT::ModifyInstDT;
1764	return true;
1765	}
1766
1767	// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1768	// The same transformation exists in DAG combiner, but we repeat it here because
1769	// DAG builder can break the pattern by moving icmp into a successor block.
1770	bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1771	CmpPredicate Pred;
1772	Value *X;
1773	const APInt *C;
1774
1775	// (icmp (ctpop x), c)
1776	if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
1777	m_APIntAllowPoison(C))))
1778	return false;
1779
1780	// We're only interested in "is power of 2 [or zero]" patterns.
1781	bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(P: Pred) && *C == `1`;
1782	bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == `2`) \|\|
1783	(Pred == CmpInst::ICMP_UGT && *C == `1`);
1784	if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1785	return false;
1786
1787	// Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1788	// `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1789	// and otherwise expand ctpop into a few simple instructions.
1790	Type *OpTy = X->getType();
1791	if (TLI->isCtpopFast(VT: TLI->getValueType(DL: *DL, Ty: OpTy))) {
1792	// Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1793	if (!IsStrictlyPowerOf2Test \|\| !isKnownNonZero(V: Cmp->getOperand(i_nocapture: `0`), Q: *DL))
1794	return false;
1795
1796	// ctpop(x) == 1 -> ctpop(x) u< 2
1797	// ctpop(x) != 1 -> ctpop(x) u> 1
1798	if (Pred == ICmpInst::ICMP_EQ) {
1799	Cmp->setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: OpTy, V: `2`));
1800	Cmp->setPredicate(ICmpInst::ICMP_ULT);
1801	} else {
1802	Cmp->setPredicate(ICmpInst::ICMP_UGT);
1803	}
1804	return true;
1805	}
1806
1807	Value *NewCmp;
1808	if (IsPowerOf2OrZeroTest \|\|
1809	(IsStrictlyPowerOf2Test && isKnownNonZero(V: Cmp->getOperand(i_nocapture: `0`), Q: *DL))) {
1810	// ctpop(x) u< 2 -> (x & (x - 1)) == 0
1811	// ctpop(x) u> 1 -> (x & (x - 1)) != 0
1812	IRBuilder<> Builder(Cmp);
1813	Value *Sub = Builder.CreateAdd(LHS: X, RHS: Constant::getAllOnesValue(Ty: OpTy));
1814	Value *And = Builder.CreateAnd(LHS: X, RHS: Sub);
1815	CmpInst::Predicate NewPred =
1816	(Pred == CmpInst::ICMP_ULT \|\| Pred == CmpInst::ICMP_EQ)
1817	? CmpInst::ICMP_EQ
1818	: CmpInst::ICMP_NE;
1819	NewCmp = Builder.CreateICmp(P: NewPred, LHS: And, RHS: ConstantInt::getNullValue(Ty: OpTy));
1820	} else {
1821	// ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1822	// ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1823	IRBuilder<> Builder(Cmp);
1824	Value *Sub = Builder.CreateAdd(LHS: X, RHS: Constant::getAllOnesValue(Ty: OpTy));
1825	Value *Xor = Builder.CreateXor(LHS: X, RHS: Sub);
1826	CmpInst::Predicate NewPred =
1827	Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1828	NewCmp = Builder.CreateICmp(P: NewPred, LHS: Xor, RHS: Sub);
1829	}
1830
1831	Cmp->replaceAllUsesWith(V: NewCmp);
1832	RecursivelyDeleteTriviallyDeadInstructions(V: Cmp);
1833	return true;
1834	}
1835
1836	/// Sink the given CmpInst into user blocks to reduce the number of virtual
1837	/// registers that must be created and coalesced. This is a clear win except on
1838	/// targets with multiple condition code registers (PowerPC), where it might
1839	/// lose; some adjustment may be wanted there.
1840	///
1841	/// Return true if any changes are made.
1842	static bool sinkCmpExpression(CmpInst Cmp, const* TargetLowering &TLI) {
1843	if (TLI.hasMultipleConditionRegisters())
1844	return false;
1845
1846	// Avoid sinking soft-FP comparisons, since this can move them into a loop.
1847	if (TLI.useSoftFloat() && isa<FCmpInst>(Val: Cmp))
1848	return false;
1849
1850	// Only insert a cmp in each block once.
1851	DenseMap<BasicBlock , CmpInst > InsertedCmps;
1852
1853	bool MadeChange = false;
1854	for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1855	UI != E;) {
1856	Use &TheUse = UI.getUse();
1857	Instruction User = cast<Instruction>(Val: UI);
1858
1859	// Preincrement use iterator so we don't invalidate it.
1860	++UI;
1861
1862	// Don't bother for PHI nodes.
1863	if (isa<PHINode>(Val: User))
1864	continue;
1865
1866	// Figure out which BB this cmp is used in.
1867	BasicBlock *UserBB = User->getParent();
1868	BasicBlock *DefBB = Cmp->getParent();
1869
1870	// If this user is in the same block as the cmp, don't change the cmp.
1871	if (UserBB == DefBB)
1872	continue;
1873
1874	// If we have already inserted a cmp into this block, use it.
1875	CmpInst *&InsertedCmp = InsertedCmps [UserBB];
1876
1877	if (!InsertedCmp) {
1878	BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1879	assert(InsertPt != UserBB->end());
1880	InsertedCmp = CmpInst::Create(Op: Cmp->getOpcode(), Pred: Cmp->getPredicate(),
1881	S1: Cmp->getOperand(i_nocapture: `0`), S2: Cmp->getOperand(i_nocapture: `1`), Name: "");
1882	InsertedCmp->insertBefore(BB&: *UserBB, InsertPos: InsertPt);
1883	// Propagate the debug info.
1884	InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1885	}
1886
1887	// Replace a use of the cmp with a use of the new cmp.
1888	TheUse = InsertedCmp;
1889	MadeChange = true;
1890	++NumCmpUses;
1891	}
1892
1893	// If we removed all uses, nuke the cmp.
1894	if (Cmp->use_empty()) {
1895	Cmp->eraseFromParent();
1896	MadeChange = true;
1897	}
1898
1899	return MadeChange;
1900	}
1901
1902	/// For pattern like:
1903	///
1904	/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1905	/// ...
1906	/// DomBB:
1907	/// ...
1908	/// br DomCond, TrueBB, CmpBB
1909	/// CmpBB: (with DomBB being the single predecessor)
1910	/// ...
1911	/// Cmp = icmp eq CmpOp0, CmpOp1
1912	/// ...
1913	///
1914	/// It would use two comparison on targets that lowering of icmp sgt/slt is
1915	/// different from lowering of icmp eq (PowerPC). This function try to convert
1916	/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1917	/// After that, DomCond and Cmp can use the same comparison so reduce one
1918	/// comparison.
1919	///
1920	/// Return true if any changes are made.
1921	static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
1922	const TargetLowering &TLI) {
1923	if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
1924	return false;
1925
1926	ICmpInst::Predicate Pred = Cmp->getPredicate();
1927	if (Pred != ICmpInst::ICMP_EQ)
1928	return false;
1929
1930	// If icmp eq has users other than BranchInst and SelectInst, converting it to
1931	// icmp slt/sgt would introduce more redundant LLVM IR.
1932	for (User *U : Cmp->users()) {
1933	if (isa<BranchInst>(Val: U))
1934	continue;
1935	if (isa<SelectInst>(Val: U) && cast<SelectInst>(Val: U)->getCondition() == Cmp)
1936	continue;
1937	return false;
1938	}
1939
1940	// This is a cheap/incomplete check for dominance - just match a single
1941	// predecessor with a conditional branch.
1942	BasicBlock *CmpBB = Cmp->getParent();
1943	BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1944	if (!DomBB)
1945	return false;
1946
1947	// We want to ensure that the only way control gets to the comparison of
1948	// interest is that a less/greater than comparison on the same operands is
1949	// false.
1950	Value *DomCond;
1951	BasicBlock TrueBB, FalseBB;
1952	if (!match(V: DomBB->getTerminator(), P: m_Br(C: m_Value(V&: DomCond), T&: TrueBB, F&: FalseBB)))
1953	return false;
1954	if (CmpBB != FalseBB)
1955	return false;
1956
1957	Value CmpOp0 = Cmp->getOperand(i_nocapture: `0`), CmpOp1 = Cmp->getOperand(i_nocapture: `1`);
1958	CmpPredicate DomPred;
1959	if (!match(V: DomCond, P: m_ICmp(Pred&: DomPred, L: m_Specific(V: CmpOp0), R: m_Specific(V: CmpOp1))))
1960	return false;
1961	if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1962	return false;
1963
1964	// Convert the equality comparison to the opposite of the dominating
1965	// comparison and swap the direction for all branch/select users.
1966	// We have conceptually converted:
1967	// Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1968	// to
1969	// Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1970	// And similarly for branches.
1971	for (User *U : Cmp->users()) {
1972	if (auto *BI = dyn_cast<BranchInst>(Val: U)) {
1973	assert(BI->isConditional() && "Must be conditional");
1974	BI->swapSuccessors();
1975	continue;
1976	}
1977	if (auto *SI = dyn_cast<SelectInst>(Val: U)) {
1978	// Swap operands
1979	SI->swapValues();
1980	SI->swapProfMetadata();
1981	continue;
1982	}
1983	llvm_unreachable("Must be a branch or a select");
1984	}
1985	Cmp->setPredicate(CmpInst::getSwappedPredicate(pred: DomPred));
1986	return true;
1987	}
1988
1989	/// Many architectures use the same instruction for both subtract and cmp. Try
1990	/// to swap cmp operands to match subtract operations to allow for CSE.
1991	static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
1992	Value *Op0 = Cmp->getOperand(i_nocapture: `0`);
1993	Value *Op1 = Cmp->getOperand(i_nocapture: `1`);
1994	if (!Op0->getType()->isIntegerTy() \|\| isa<Constant>(Val: Op0) \|\|
1995	isa<Constant>(Val: Op1) \|\| Op0 == Op1)
1996	return false;
1997
1998	// If a subtract already has the same operands as a compare, swapping would be
1999	// bad. If a subtract has the same operands as a compare but in reverse order,
2000	// then swapping is good.
2001	int GoodToSwap = `0`;
2002	unsigned NumInspected = `0`;
2003	for (const User *U : Op0->users()) {
2004	// Avoid walking many users.
2005	if (++NumInspected > `128`)
2006	return false;
2007	if (match(V: U, P: m_Sub(L: m_Specific(V: Op1), R: m_Specific(V: Op0))))
2008	GoodToSwap++;
2009	else if (match(V: U, P: m_Sub(L: m_Specific(V: Op0), R: m_Specific(V: Op1))))
2010	GoodToSwap--;
2011	}
2012
2013	if (GoodToSwap > `0`) {
2014	Cmp->swapOperands();
2015	return true;
2016	}
2017	return false;
2018	}
2019
2020	static bool foldFCmpToFPClassTest(CmpInst Cmp, const* TargetLowering &TLI,
2021	const DataLayout &DL) {
2022	FCmpInst *FCmp = dyn_cast<FCmpInst>(Val: Cmp);
2023	if (!FCmp)
2024	return false;
2025
2026	// Don't fold if the target offers free fabs and the predicate is legal.
2027	EVT VT = TLI.getValueType(DL, Ty: Cmp->getOperand(i_nocapture: `0`)->getType());
2028	if (TLI.isFAbsFree(VT) &&
2029	TLI.isCondCodeLegal(CC: getFCmpCondCode(Pred: FCmp->getPredicate()),
2030	VT: VT.getSimpleVT()))
2031	return false;
2032
2033	// Reverse the canonicalization if it is a FP class test
2034	auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2035	return ClassTest == fcInf \|\| ClassTest == (fcInf \| fcNan);
2036	};
2037	auto [ClassVal, ClassTest] =
2038	fcmpToClassTest(Pred: FCmp->getPredicate(), F: *FCmp->getParent()->getParent(),
2039	LHS: FCmp->getOperand(i_nocapture: `0`), RHS: FCmp->getOperand(i_nocapture: `1`));
2040	if (!ClassVal)
2041	return false;
2042
2043	if (!ShouldReverseTransform (ClassTest) && !ShouldReverseTransform (~ClassTest))
2044	return false;
2045
2046	IRBuilder<> Builder(Cmp);
2047	Value *IsFPClass = Builder.createIsFPClass(FPNum: ClassVal, Test: ClassTest);
2048	Cmp->replaceAllUsesWith(V: IsFPClass);
2049	RecursivelyDeleteTriviallyDeadInstructions(V: Cmp);
2050	return true;
2051	}
2052
2053	static bool isRemOfLoopIncrementWithLoopInvariant(
2054	Instruction Rem, const* LoopInfo LI, Value &RemAmtOut, Value *&AddInstOut,
2055	Value &AddOffsetOut, PHINode &LoopIncrPNOut) {
2056	Value Incr, RemAmt;
2057	// NB: If RemAmt is a power of 2 it should* have been transformed by now.*
2058	if (!match(V: Rem, P: m_URem(L: m_Value(V&: Incr), R: m_Value(V&: RemAmt))))
2059	return false;
2060
2061	Value AddInst, AddOffset;
2062	// Find out loop increment PHI.
2063	auto *PN = dyn_cast<PHINode>(Val: Incr);
2064	if (PN != nullptr) {
2065	AddInst = nullptr;
2066	AddOffset = nullptr;
2067	} else {
2068	// Search through a NUW add on top of the loop increment.
2069	Value V0, V1;
2070	if (!match(V: Incr, P: m_NUWAdd(L: m_Value(V&: V0), R: m_Value(V&: V1))))
2071	return false;
2072
2073	AddInst = Incr;
2074	PN = dyn_cast<PHINode>(Val: V0);
2075	if (PN != nullptr) {
2076	AddOffset = V1;
2077	} else {
2078	PN = dyn_cast<PHINode>(Val: V1);
2079	AddOffset = V0;
2080	}
2081	}
2082
2083	if (!PN)
2084	return false;
2085
2086	// This isn't strictly necessary, what we really need is one increment and any
2087	// amount of initial values all being the same.
2088	if (PN->getNumIncomingValues() != `2`)
2089	return false;
2090
2091	// Only trivially analyzable loops.
2092	Loop *L = LI->getLoopFor(BB: PN->getParent());
2093	if (!L \|\| !L->getLoopPreheader() \|\| !L->getLoopLatch())
2094	return false;
2095
2096	// Req that the remainder is in the loop
2097	if (!L->contains(Inst: Rem))
2098	return false;
2099
2100	// Only works if the remainder amount is a loop invaraint
2101	if (!L->isLoopInvariant(V: RemAmt))
2102	return false;
2103
2104	// Is the PHI a loop increment?
2105	auto LoopIncrInfo = getIVIncrement(PN, LI);
2106	if (!LoopIncrInfo)
2107	return false;
2108
2109	// We need remainder_amount % increment_amount to be zero. Increment of one
2110	// satisfies that without any special logic and is overwhelmingly the common
2111	// case.
2112	if (!match(V: LoopIncrInfo ->second, P: m_One()))
2113	return false;
2114
2115	// Need the increment to not overflow.
2116	if (!match(V: LoopIncrInfo ->first, P: m_c_NUWAdd(L: m_Specific(V: PN), R: m_Value())))
2117	return false;
2118
2119	// Set output variables.
2120	RemAmtOut = RemAmt;
2121	LoopIncrPNOut = PN;
2122	AddInstOut = AddInst;
2123	AddOffsetOut = AddOffset;
2124
2125	return true;
2126	}
2127
2128	// Try to transform:
2129	//
2130	// for(i = Start; i < End; ++i)
2131	// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2132	//
2133	// ->
2134	//
2135	// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2136	// for(i = Start; i < End; ++i, ++rem)
2137	// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2138	static bool foldURemOfLoopIncrement(Instruction Rem, const* DataLayout *DL,
2139	const LoopInfo *LI,
2140	SmallSet<BasicBlock *, `32`> &FreshBBs,
2141	bool IsHuge) {
2142	Value AddOffset, RemAmt, *AddInst;
2143	PHINode *LoopIncrPN;
2144	if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmtOut&: RemAmt, AddInstOut&: AddInst,
2145	AddOffsetOut&: AddOffset, LoopIncrPNOut&: LoopIncrPN))
2146	return false;
2147
2148	// Only non-constant remainder as the extra IV is probably not profitable
2149	// in that case.
2150	//
2151	// Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2152	// we can rule out register pressure and ensure this `urem` is executed each
2153	// iteration, its probably profitable to handle the const case as well.
2154	//
2155	// Potential TODO(2): Should we have a check for how "nested" this remainder
2156	// operation is? The new code runs every iteration so if the remainder is
2157	// guarded behind unlikely conditions this might not be worth it.
2158	if (match(V: RemAmt, P: m_ImmConstant()))
2159	return false;
2160
2161	Loop *L = LI->getLoopFor(BB: LoopIncrPN->getParent());
2162	Value *Start = LoopIncrPN->getIncomingValueForBlock(BB: L->getLoopPreheader());
2163	// If we have add create initial value for remainder.
2164	// The logic here is:
2165	// (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2166	//
2167	// Only proceed if the expression simplifies (otherwise we can't fully
2168	// optimize out the urem).
2169	if (AddInst) {
2170	assert(AddOffset && "We found an add but missing values");
2171	// Without dom-condition/assumption cache we aren't likely to get much out
2172	// of a context instruction.
2173	Start = simplifyAddInst(LHS: Start, RHS: AddOffset,
2174	IsNSW: match(V: AddInst, P: m_NSWAdd(L: m_Value(), R: m_Value())),
2175	/IsNUW=/true, Q: *DL);
2176	if (!Start)
2177	return false;
2178	}
2179
2180	// If we can't fully optimize out the `rem`, skip this transform.
2181	Start = simplifyURemInst(LHS: Start, RHS: RemAmt, Q: *DL);
2182	if (!Start)
2183	return false;
2184
2185	// Create new remainder with induction variable.
2186	Type *Ty = Rem->getType();
2187	IRBuilder<> Builder(Rem->getContext());
2188
2189	Builder.SetInsertPoint(LoopIncrPN);
2190	PHINode *NewRem = Builder.CreatePHI(Ty, NumReservedValues: `2`);
2191
2192	Builder.SetInsertPoint(cast<Instruction>(
2193	Val: LoopIncrPN->getIncomingValueForBlock(BB: L->getLoopLatch())));
2194	// `(add (urem x, y), 1)` is always nuw.
2195	Value *RemAdd = Builder.CreateNUWAdd(LHS: NewRem, RHS: ConstantInt::get(Ty, V: `1`));
2196	Value *RemCmp = Builder.CreateICmp(P: ICmpInst::ICMP_EQ, LHS: RemAdd, RHS: RemAmt);
2197	Value *RemSel =
2198	Builder.CreateSelect(C: RemCmp, True: Constant::getNullValue(Ty), False: RemAdd);
2199
2200	NewRem->addIncoming(V: Start, BB: L->getLoopPreheader());
2201	NewRem->addIncoming(V: RemSel, BB: L->getLoopLatch());
2202
2203	// Insert all touched BBs.
2204	FreshBBs.insert(Ptr: LoopIncrPN->getParent());
2205	FreshBBs.insert(Ptr: L->getLoopLatch());
2206	FreshBBs.insert(Ptr: Rem->getParent());
2207	if (AddInst)
2208	FreshBBs.insert(Ptr: cast<Instruction>(Val: AddInst)->getParent());
2209	replaceAllUsesWith(Old: Rem, New: NewRem, FreshBBs, IsHuge);
2210	Rem->eraseFromParent();
2211	if (AddInst && AddInst->use_empty())
2212	cast<Instruction>(Val: AddInst)->eraseFromParent();
2213	return true;
2214	}
2215
2216	bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2217	if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHuge: IsHugeFunc))
2218	return true;
2219	return false;
2220	}
2221
2222	bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2223	if (sinkCmpExpression(Cmp, TLI: *TLI))
2224	return true;
2225
2226	if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2227	return true;
2228
2229	if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2230	return true;
2231
2232	if (unfoldPowerOf2Test(Cmp))
2233	return true;
2234
2235	if (foldICmpWithDominatingICmp(Cmp, TLI: *TLI))
2236	return true;
2237
2238	if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
2239	return true;
2240
2241	if (foldFCmpToFPClassTest(Cmp, TLI: TLI, DL: DL))
2242	return true;
2243
2244	return false;
2245	}
2246
2247	/// Duplicate and sink the given 'and' instruction into user blocks where it is
2248	/// used in a compare to allow isel to generate better code for targets where
2249	/// this operation can be combined.
2250	///
2251	/// Return true if any changes are made.
2252	static bool sinkAndCmp0Expression(Instruction AndI, const* TargetLowering &TLI,
2253	SetOfInstrs &InsertedInsts) {
2254	// Double-check that we're not trying to optimize an instruction that was
2255	// already optimized by some other part of this pass.
2256	assert(!InsertedInsts.count(AndI) &&
2257	"Attempting to optimize already optimized and instruction");
2258	(void)InsertedInsts;
2259
2260	// Nothing to do for single use in same basic block.
2261	if (AndI->hasOneUse() &&
2262	AndI->getParent() == cast<Instruction>(Val: *AndI->user_begin())->getParent())
2263	return false;
2264
2265	// Try to avoid cases where sinking/duplicating is likely to increase register
2266	// pressure.
2267	if (!isa<ConstantInt>(Val: AndI->getOperand(i: `0`)) &&
2268	!isa<ConstantInt>(Val: AndI->getOperand(i: `1`)) &&
2269	AndI->getOperand(i: `0`)->hasOneUse() && AndI->getOperand(i: `1`)->hasOneUse())
2270	return false;
2271
2272	for (auto *U : AndI->users()) {
2273	Instruction *User = cast<Instruction>(Val: U);
2274
2275	// Only sink 'and' feeding icmp with 0.
2276	if (!isa<ICmpInst>(Val: User))
2277	return false;
2278
2279	auto *CmpC = dyn_cast<ConstantInt>(Val: User->getOperand(i: `1`));
2280	if (!CmpC \|\| !CmpC->isZero())
2281	return false;
2282	}
2283
2284	if (!TLI.isMaskAndCmp0FoldingBeneficial(AndI: *AndI))
2285	return false;
2286
2287	LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2288	LLVM_DEBUG(AndI->getParent()->dump());
2289
2290	// Push the 'and' into the same block as the icmp 0. There should only be
2291	// one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2292	// others, so we don't need to keep track of which BBs we insert into.
2293	for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2294	UI != E;) {
2295	Use &TheUse = UI.getUse();
2296	Instruction User = cast<Instruction>(Val: UI);
2297
2298	// Preincrement use iterator so we don't invalidate it.
2299	++UI;
2300
2301	LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2302
2303	// Keep the 'and' in the same place if the use is already in the same block.
2304	Instruction *InsertPt =
2305	User->getParent() == AndI->getParent() ? AndI : User;
2306	Instruction *InsertedAnd = BinaryOperator::Create(
2307	Op: Instruction::And, S1: AndI->getOperand(i: `0`), S2: AndI->getOperand(i: `1`), Name: "",
2308	InsertBefore: InsertPt->getIterator());
2309	// Propagate the debug info.
2310	InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2311
2312	// Replace a use of the 'and' with a use of the new 'and'.
2313	TheUse = InsertedAnd;
2314	++NumAndUses;
2315	LLVM_DEBUG(User->getParent()->dump());
2316	}
2317
2318	// We removed all uses, nuke the and.
2319	AndI->eraseFromParent();
2320	return true;
2321	}
2322
2323	/// Check if the candidates could be combined with a shift instruction, which
2324	/// includes:
2325	/// 1. Truncate instruction
2326	/// 2. And instruction and the imm is a mask of the low bits:
2327	/// imm & (imm+1) == 0
2328	static bool isExtractBitsCandidateUse(Instruction *User) {
2329	if (!isa<TruncInst>(Val: User)) {
2330	if (User->getOpcode() != Instruction::And \|\|
2331	!isa<ConstantInt>(Val: User->getOperand(i: `1`)))
2332	return false;
2333
2334	const APInt &Cimm = cast<ConstantInt>(Val: User->getOperand(i: `1`))->getValue();
2335
2336	if ((Cimm & (Cimm + `1`)).getBoolValue())
2337	return false;
2338	}
2339	return true;
2340	}
2341
2342	/// Sink both shift and truncate instruction to the use of truncate's BB.
2343	static bool
2344	SinkShiftAndTruncate(BinaryOperator ShiftI, Instruction User, ConstantInt *CI,
2345	DenseMap<BasicBlock , BinaryOperator > &InsertedShifts,
2346	const TargetLowering &TLI, const DataLayout &DL) {
2347	BasicBlock *UserBB = User->getParent();
2348	DenseMap<BasicBlock , CastInst > InsertedTruncs;
2349	auto *TruncI = cast<TruncInst>(Val: User);
2350	bool MadeChange = false;
2351
2352	for (Value::user_iterator TruncUI = TruncI->user_begin(),
2353	TruncE = TruncI->user_end();
2354	TruncUI != TruncE;) {
2355
2356	Use &TruncTheUse = TruncUI.getUse();
2357	Instruction TruncUser = cast<Instruction>(Val: TruncUI);
2358	// Preincrement use iterator so we don't invalidate it.
2359
2360	++TruncUI;
2361
2362	int ISDOpcode = TLI.InstructionOpcodeToISD(Opcode: TruncUser->getOpcode());
2363	if (!ISDOpcode)
2364	continue;
2365
2366	// If the use is actually a legal node, there will not be an
2367	// implicit truncate.
2368	// FIXME: always querying the result type is just an
2369	// approximation; some nodes' legality is determined by the
2370	// operand or other means. There's no good way to find out though.
2371	if (TLI.isOperationLegalOrCustom(
2372	Op: ISDOpcode, VT: TLI.getValueType(DL, Ty: TruncUser->getType(), AllowUnknown: true)))
2373	continue;
2374
2375	// Don't bother for PHI nodes.
2376	if (isa<PHINode>(Val: TruncUser))
2377	continue;
2378
2379	BasicBlock *TruncUserBB = TruncUser->getParent();
2380
2381	if (UserBB == TruncUserBB)
2382	continue;
2383
2384	BinaryOperator *&InsertedShift = InsertedShifts [TruncUserBB];
2385	CastInst *&InsertedTrunc = InsertedTruncs [TruncUserBB];
2386
2387	if (!InsertedShift && !InsertedTrunc) {
2388	BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2389	assert(InsertPt != TruncUserBB->end());
2390	// Sink the shift
2391	if (ShiftI->getOpcode() == Instruction::AShr)
2392	InsertedShift =
2393	BinaryOperator::CreateAShr(V1: ShiftI->getOperand(i_nocapture: `0`), V2: CI, Name: "");
2394	else
2395	InsertedShift =
2396	BinaryOperator::CreateLShr(V1: ShiftI->getOperand(i_nocapture: `0`), V2: CI, Name: "");
2397	InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2398	InsertedShift->insertBefore(BB&: *TruncUserBB, InsertPos: InsertPt);
2399
2400	// Sink the trunc
2401	BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2402	TruncInsertPt ++;
2403	// It will go ahead of any debug-info.
2404	TruncInsertPt.setHeadBit(true);
2405	assert(TruncInsertPt != TruncUserBB->end());
2406
2407	InsertedTrunc = CastInst::Create(TruncI->getOpcode(), S: InsertedShift,
2408	Ty: TruncI->getType(), Name: "");
2409	InsertedTrunc->insertBefore(BB&: *TruncUserBB, InsertPos: TruncInsertPt);
2410	InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2411
2412	MadeChange = true;
2413
2414	TruncTheUse = InsertedTrunc;
2415	}
2416	}
2417	return MadeChange;
2418	}
2419
2420	/// Sink the shift right* instruction into user blocks if the uses could*
2421	/// potentially be combined with this shift instruction and generate BitExtract
2422	/// instruction. It will only be applied if the architecture supports BitExtract
2423	/// instruction. Here is an example:
2424	/// BB1:
2425	/// %x.extract.shift = lshr i64 %arg1, 32
2426	/// BB2:
2427	/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2428	/// ==>
2429	///
2430	/// BB2:
2431	/// %x.extract.shift.1 = lshr i64 %arg1, 32
2432	/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2433	///
2434	/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2435	/// instruction.
2436	/// Return true if any changes are made.
2437	static bool OptimizeExtractBits(BinaryOperator ShiftI, ConstantInt CI,
2438	const TargetLowering &TLI,
2439	const DataLayout &DL) {
2440	BasicBlock *DefBB = ShiftI->getParent();
2441
2442	/// Only insert instructions in each block once.
2443	DenseMap<BasicBlock , BinaryOperator > InsertedShifts;
2444
2445	bool shiftIsLegal = TLI.isTypeLegal(VT: TLI.getValueType(DL, Ty: ShiftI->getType()));
2446
2447	bool MadeChange = false;
2448	for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2449	UI != E;) {
2450	Use &TheUse = UI.getUse();
2451	Instruction User = cast<Instruction>(Val: UI);
2452	// Preincrement use iterator so we don't invalidate it.
2453	++UI;
2454
2455	// Don't bother for PHI nodes.
2456	if (isa<PHINode>(Val: User))
2457	continue;
2458
2459	if (!isExtractBitsCandidateUse(User))
2460	continue;
2461
2462	BasicBlock *UserBB = User->getParent();
2463
2464	if (UserBB == DefBB) {
2465	// If the shift and truncate instruction are in the same BB. The use of
2466	// the truncate(TruncUse) may still introduce another truncate if not
2467	// legal. In this case, we would like to sink both shift and truncate
2468	// instruction to the BB of TruncUse.
2469	// for example:
2470	// BB1:
2471	// i64 shift.result = lshr i64 opnd, imm
2472	// trunc.result = trunc shift.result to i16
2473	//
2474	// BB2:
2475	// ----> We will have an implicit truncate here if the architecture does
2476	// not have i16 compare.
2477	// cmp i16 trunc.result, opnd2
2478	//
2479	if (isa<TruncInst>(Val: User) &&
2480	shiftIsLegal
2481	// If the type of the truncate is legal, no truncate will be
2482	// introduced in other basic blocks.
2483	&& (!TLI.isTypeLegal(VT: TLI.getValueType(DL, Ty: User->getType()))))
2484	MadeChange =
2485	SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2486
2487	continue;
2488	}
2489	// If we have already inserted a shift into this block, use it.
2490	BinaryOperator *&InsertedShift = InsertedShifts [UserBB];
2491
2492	if (!InsertedShift) {
2493	BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2494	assert(InsertPt != UserBB->end());
2495
2496	if (ShiftI->getOpcode() == Instruction::AShr)
2497	InsertedShift =
2498	BinaryOperator::CreateAShr(V1: ShiftI->getOperand(i_nocapture: `0`), V2: CI, Name: "");
2499	else
2500	InsertedShift =
2501	BinaryOperator::CreateLShr(V1: ShiftI->getOperand(i_nocapture: `0`), V2: CI, Name: "");
2502	InsertedShift->insertBefore(BB&: *UserBB, InsertPos: InsertPt);
2503	InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2504
2505	MadeChange = true;
2506	}
2507
2508	// Replace a use of the shift with a use of the new shift.
2509	TheUse = InsertedShift;
2510	}
2511
2512	// If we removed all uses, or there are none, nuke the shift.
2513	if (ShiftI->use_empty()) {
2514	salvageDebugInfo(I&: *ShiftI);
2515	ShiftI->eraseFromParent();
2516	MadeChange = true;
2517	}
2518
2519	return MadeChange;
2520	}
2521
2522	/// If counting leading or trailing zeros is an expensive operation and a zero
2523	/// input is defined, add a check for zero to avoid calling the intrinsic.
2524	///
2525	/// We want to transform:
2526	/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2527	///
2528	/// into:
2529	/// entry:
2530	/// %cmpz = icmp eq i64 %A, 0
2531	/// br i1 %cmpz, label %cond.end, label %cond.false
2532	/// cond.false:
2533	/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2534	/// br label %cond.end
2535	/// cond.end:
2536	/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2537	///
2538	/// If the transform is performed, return true and set ModifiedDT to true.
2539	static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2540	LoopInfo &LI,
2541	const TargetLowering *TLI,
2542	const DataLayout *DL, ModifyDT &ModifiedDT,
2543	SmallSet<BasicBlock *, `32`> &FreshBBs,
2544	bool IsHugeFunc) {
2545	// If a zero input is undefined, it doesn't make sense to despeculate that.
2546	if (match(V: CountZeros->getOperand(i_nocapture: `1`), P: m_One()))
2547	return false;
2548
2549	// If it's cheap to speculate, there's nothing to do.
2550	Type *Ty = CountZeros->getType();
2551	auto IntrinsicID = CountZeros->getIntrinsicID();
2552	if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) \|\|
2553	(IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2554	return false;
2555
2556	// Only handle scalar cases. Anything else requires too much work.
2557	unsigned SizeInBits = Ty->getScalarSizeInBits();
2558	if (Ty->isVectorTy())
2559	return false;
2560
2561	// Bail if the value is never zero.
2562	Use &Op = CountZeros->getOperandUse(i: `0`);
2563	if (isKnownNonZero(V: Op, Q: *DL))
2564	return false;
2565
2566	// The intrinsic will be sunk behind a compare against zero and branch.
2567	BasicBlock *StartBlock = CountZeros->getParent();
2568	BasicBlock *CallBlock = StartBlock->splitBasicBlock(I: CountZeros, BBName: "cond.false");
2569	if (IsHugeFunc)
2570	FreshBBs.insert(Ptr: CallBlock);
2571
2572	// Create another block after the count zero intrinsic. A PHI will be added
2573	// in this block to select the result of the intrinsic or the bit-width
2574	// constant if the input to the intrinsic is zero.
2575	BasicBlock::iterator SplitPt = std::next(x: BasicBlock::iterator (CountZeros));
2576	// Any debug-info after CountZeros should not be included.
2577	SplitPt.setHeadBit(true);
2578	BasicBlock *EndBlock = CallBlock->splitBasicBlock(I: SplitPt, BBName: "cond.end");
2579	if (IsHugeFunc)
2580	FreshBBs.insert(Ptr: EndBlock);
2581
2582	// Update the LoopInfo. The new blocks are in the same loop as the start
2583	// block.
2584	if (Loop *L = LI.getLoopFor(BB: StartBlock)) {
2585	L->addBasicBlockToLoop(NewBB: CallBlock, LI);
2586	L->addBasicBlockToLoop(NewBB: EndBlock, LI);
2587	}
2588
2589	// Set up a builder to create a compare, conditional branch, and PHI.
2590	IRBuilder<> Builder(CountZeros->getContext());
2591	Builder.SetInsertPoint(StartBlock->getTerminator());
2592	Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2593
2594	// Replace the unconditional branch that was created by the first split with
2595	// a compare against zero and a conditional branch.
2596	Value *Zero = Constant::getNullValue(Ty);
2597	// Avoid introducing branch on poison. This also replaces the ctz operand.
2598	if (!isGuaranteedNotToBeUndefOrPoison(V: Op))
2599	Op = Builder.CreateFreeze(V: Op, Name: Op ->getName() + ".fr");
2600	Value *Cmp = Builder.CreateICmpEQ(LHS: Op, RHS: Zero, Name: "cmpz");
2601	Builder.CreateCondBr(Cond: Cmp, True: EndBlock, False: CallBlock);
2602	StartBlock->getTerminator()->eraseFromParent();
2603
2604	// Create a PHI in the end block to select either the output of the intrinsic
2605	// or the bit width of the operand.
2606	Builder.SetInsertPoint(TheBB: EndBlock, IP: EndBlock->begin());
2607	PHINode *PN = Builder.CreatePHI(Ty, NumReservedValues: `2`, Name: "ctz");
2608	replaceAllUsesWith(Old: CountZeros, New: PN, FreshBBs, IsHuge: IsHugeFunc);
2609	Value *BitWidth = Builder.getInt(AI: APInt (SizeInBits, SizeInBits));
2610	PN->addIncoming(V: BitWidth, BB: StartBlock);
2611	PN->addIncoming(V: CountZeros, BB: CallBlock);
2612
2613	// We are explicitly handling the zero case, so we can set the intrinsic's
2614	// undefined zero argument to 'true'. This will also prevent reprocessing the
2615	// intrinsic; we only despeculate when a zero input is defined.
2616	CountZeros->setArgOperand(i: `1`, v: Builder.getTrue());
2617	ModifiedDT = ModifyDT::ModifyBBDT;
2618	return true;
2619	}
2620
2621	bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2622	BasicBlock *BB = CI->getParent();
2623
2624	// Lower inline assembly if we can.
2625	// If we found an inline asm expession, and if the target knows how to
2626	// lower it to normal LLVM code, do so now.
2627	if (CI->isInlineAsm()) {
2628	if (TLI->ExpandInlineAsm(CI)) {
2629	// Avoid invalidating the iterator.
2630	CurInstIterator = BB->begin();
2631	// Avoid processing instructions out of order, which could cause
2632	// reuse before a value is defined.
2633	SunkAddrs.clear();
2634	return true;
2635	}
2636	// Sink address computing for memory operands into the block.
2637	if (optimizeInlineAsmInst(CS: CI))
2638	return true;
2639	}
2640
2641	// Align the pointer arguments to this call if the target thinks it's a good
2642	// idea
2643	unsigned MinSize;
2644	Align PrefAlign;
2645	if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2646	for (auto &Arg : CI->args()) {
2647	// We want to align both objects whose address is used directly and
2648	// objects whose address is used in casts and GEPs, though it only makes
2649	// sense for GEPs if the offset is a multiple of the desired alignment and
2650	// if size - offset meets the size threshold.
2651	if (!Arg ->getType()->isPointerTy())
2652	continue;
2653	APInt Offset(DL->getIndexSizeInBits(
2654	AS: cast<PointerType>(Val: Arg ->getType())->getAddressSpace()),
2655	`0`);
2656	Value Val = Arg ->stripAndAccumulateInBoundsConstantOffsets(DL: DL, Offset);
2657	uint64_t Offset2 = Offset.getLimitedValue();
2658	if (!isAligned(Lhs: PrefAlign, SizeInBytes: Offset2))
2659	continue;
2660	AllocaInst *AI;
2661	if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2662	DL->getTypeAllocSize(Ty: AI->getAllocatedType()) >= MinSize + Offset2)
2663	AI->setAlignment(PrefAlign);
2664	// Global variables can only be aligned if they are defined in this
2665	// object (i.e. they are uniquely initialized in this object), and
2666	// over-aligning global variables that have an explicit section is
2667	// forbidden.
2668	GlobalVariable *GV;
2669	if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2670	GV->getPointerAlignment(DL: *DL) < PrefAlign &&
2671	DL->getTypeAllocSize(Ty: GV->getValueType()) >= MinSize + Offset2)
2672	GV->setAlignment(PrefAlign);
2673	}
2674	}
2675	// If this is a memcpy (or similar) then we may be able to improve the
2676	// alignment.
2677	if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Val: CI)) {
2678	Align DestAlign = getKnownAlignment(V: MI->getDest(), DL: *DL);
2679	MaybeAlign MIDestAlign = MI->getDestAlign();
2680	if (!MIDestAlign \|\| DestAlign > *MIDestAlign)
2681	MI->setDestAlignment(DestAlign);
2682	if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Val: MI)) {
2683	MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2684	Align SrcAlign = getKnownAlignment(V: MTI->getSource(), DL: *DL);
2685	if (!MTISrcAlign \|\| SrcAlign > *MTISrcAlign)
2686	MTI->setSourceAlignment(SrcAlign);
2687	}
2688	}
2689
2690	// If we have a cold call site, try to sink addressing computation into the
2691	// cold block. This interacts with our handling for loads and stores to
2692	// ensure that we can fold all uses of a potential addressing computation
2693	// into their uses. TODO: generalize this to work over profiling data
2694	if (CI->hasFnAttr(Attribute::Cold) &&
2695	!llvm::shouldOptimizeForSize(BB, PSI, BFI: BFI.get()))
2696	for (auto &Arg : CI->args()) {
2697	if (!Arg ->getType()->isPointerTy())
2698	continue;
2699	unsigned AS = Arg ->getType()->getPointerAddressSpace();
2700	if (optimizeMemoryInst(MemoryInst: CI, Addr: Arg, AccessTy: Arg ->getType(), AddrSpace: AS))
2701	return true;
2702	}
2703
2704	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: CI);
2705	if (II) {
2706	switch (II->getIntrinsicID()) {
2707	default:
2708	break;
2709	case Intrinsic::assume:
2710	llvm_unreachable("llvm.assume should have been removed already");
2711	case Intrinsic::allow_runtime_check:
2712	case Intrinsic::allow_ubsan_check:
2713	case Intrinsic::experimental_widenable_condition: {
2714	// Give up on future widening opportunities so that we can fold away dead
2715	// paths and merge blocks before going into block-local instruction
2716	// selection.
2717	if (II->use_empty()) {
2718	II->eraseFromParent();
2719	return true;
2720	}
2721	Constant *RetVal = ConstantInt::getTrue(Context&: II->getContext());
2722	resetIteratorIfInvalidatedWhileCalling(BB, f: [&]() {
2723	replaceAndRecursivelySimplify(I: CI, SimpleV: RetVal, TLI: TLInfo, DT: nullptr);
2724	});
2725	return true;
2726	}
2727	case Intrinsic::objectsize:
2728	llvm_unreachable("llvm.objectsize.* should have been lowered already");
2729	case Intrinsic::is_constant:
2730	llvm_unreachable("llvm.is.constant.* should have been lowered already");
2731	case Intrinsic::aarch64_stlxr:
2732	case Intrinsic::aarch64_stxr: {
2733	ZExtInst *ExtVal = dyn_cast<ZExtInst>(Val: CI->getArgOperand(i: `0`));
2734	if (!ExtVal \|\| !ExtVal->hasOneUse() \|\|
2735	ExtVal->getParent() == CI->getParent())
2736	return false;
2737	// Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2738	ExtVal->moveBefore(InsertPos: CI->getIterator());
2739	// Mark this instruction as "inserted by CGP", so that other
2740	// optimizations don't touch it.
2741	InsertedInsts.insert(Ptr: ExtVal);
2742	return true;
2743	}
2744
2745	case Intrinsic::launder_invariant_group:
2746	case Intrinsic::strip_invariant_group: {
2747	Value *ArgVal = II->getArgOperand(i: `0`);
2748	auto it = LargeOffsetGEPMap.find(Key: II);
2749	if (it != LargeOffsetGEPMap.end()) {
2750	// Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2751	// Make sure not to have to deal with iterator invalidation
2752	// after possibly adding ArgVal to LargeOffsetGEPMap.
2753	auto GEPs = std::move(it->second);
2754	LargeOffsetGEPMap [ArgVal].append(in_start: GEPs.begin(), in_end: GEPs.end());
2755	LargeOffsetGEPMap.erase(Key: II);
2756	}
2757
2758	replaceAllUsesWith(Old: II, New: ArgVal, FreshBBs, IsHuge: IsHugeFunc);
2759	II->eraseFromParent();
2760	return true;
2761	}
2762	case Intrinsic::cttz:
2763	case Intrinsic::ctlz:
2764	// If counting zeros is expensive, try to avoid it.
2765	return despeculateCountZeros(CountZeros: II, LI&: *LI, TLI, DL, ModifiedDT, FreshBBs,
2766	IsHugeFunc);
2767	case Intrinsic::fshl:
2768	case Intrinsic::fshr:
2769	return optimizeFunnelShift(Fsh: II);
2770	case Intrinsic::dbg_assign:
2771	case Intrinsic::dbg_value:
2772	return fixupDbgValue(I: II);
2773	case Intrinsic::masked_gather:
2774	return optimizeGatherScatterInst(MemoryInst: II, Ptr: II->getArgOperand(i: `0`));
2775	case Intrinsic::masked_scatter:
2776	return optimizeGatherScatterInst(MemoryInst: II, Ptr: II->getArgOperand(i: `1`));
2777	}
2778
2779	SmallVector<Value *, `2`> PtrOps;
2780	Type *AccessTy;
2781	if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2782	while (!PtrOps.empty()) {
2783	Value *PtrVal = PtrOps.pop_back_val();
2784	unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2785	if (optimizeMemoryInst(MemoryInst: II, Addr: PtrVal, AccessTy, AddrSpace: AS))
2786	return true;
2787	}
2788	}
2789
2790	// From here on out we're working with named functions.
2791	auto *Callee = CI->getCalledFunction();
2792	if (!Callee)
2793	return false;
2794
2795	// Lower all default uses of _chk calls. This is very similar
2796	// to what InstCombineCalls does, but here we are only lowering calls
2797	// to fortified library functions (e.g. __memcpy_chk) that have the default
2798	// "don't know" as the objectsize. Anything else should be left alone.
2799	FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2800	IRBuilder<> Builder(CI);
2801	if (Value *V = Simplifier.optimizeCall(CI, B&: Builder)) {
2802	replaceAllUsesWith(Old: CI, New: V, FreshBBs, IsHuge: IsHugeFunc);
2803	CI->eraseFromParent();
2804	return true;
2805	}
2806
2807	// SCCP may have propagated, among other things, C++ static variables across
2808	// calls. If this happens to be the case, we may want to undo it in order to
2809	// avoid redundant pointer computation of the constant, as the function method
2810	// returning the constant needs to be executed anyways.
2811	auto GetUniformReturnValue = [](const Function F) -> GlobalVariable {
2812	if (!F->getReturnType()->isPointerTy())
2813	return nullptr;
2814
2815	GlobalVariable UniformValue = nullptr*;
2816	for (auto &BB : *F) {
2817	if (auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator())) {
2818	if (auto *V = dyn_cast<GlobalVariable>(Val: RI->getReturnValue())) {
2819	if (!UniformValue)
2820	UniformValue = V;
2821	else if (V != UniformValue)
2822	return nullptr;
2823	} else {
2824	return nullptr;
2825	}
2826	}
2827	}
2828
2829	return UniformValue;
2830	};
2831
2832	if (Callee->hasExactDefinition()) {
2833	if (GlobalVariable *RV = GetUniformReturnValue (Callee)) {
2834	bool MadeChange = false;
2835	for (Use &U : make_early_inc_range(Range: RV->uses())) {
2836	auto *I = dyn_cast<Instruction>(Val: U.getUser());
2837	if (!I \|\| I->getParent() != CI->getParent()) {
2838	// Limit to the same basic block to avoid extending the call-site live
2839	// range, which otherwise could increase register pressure.
2840	continue;
2841	}
2842	if (CI->comesBefore(Other: I)) {
2843	U.set(CI);
2844	MadeChange = true;
2845	}
2846	}
2847
2848	return MadeChange;
2849	}
2850	}
2851
2852	return false;
2853	}
2854
2855	static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
2856	const CallInst *CI) {
2857	assert(CI && CI->use_empty());
2858
2859	if (const auto *II = dyn_cast<IntrinsicInst>(Val: CI))
2860	switch (II->getIntrinsicID()) {
2861	case Intrinsic::memset:
2862	case Intrinsic::memcpy:
2863	case Intrinsic::memmove:
2864	return true;
2865	default:
2866	return false;
2867	}
2868
2869	LibFunc LF;
2870	Function *Callee = CI->getCalledFunction();
2871	if (Callee && TLInfo && TLInfo->getLibFunc(FDecl: *Callee, F&: LF))
2872	switch (LF) {
2873	case LibFunc_strcpy:
2874	case LibFunc_strncpy:
2875	case LibFunc_strcat:
2876	case LibFunc_strncat:
2877	return true;
2878	default:
2879	return false;
2880	}
2881
2882	return false;
2883	}
2884
2885	/// Look for opportunities to duplicate return instructions to the predecessor
2886	/// to enable tail call optimizations. The case it is currently looking for is
2887	/// the following one. Known intrinsics or library function that may be tail
2888	/// called are taken into account as well.
2889	/// @code
2890	/// bb0:
2891	/// %tmp0 = tail call i32 @f0()
2892	/// br label %return
2893	/// bb1:
2894	/// %tmp1 = tail call i32 @f1()
2895	/// br label %return
2896	/// bb2:
2897	/// %tmp2 = tail call i32 @f2()
2898	/// br label %return
2899	/// return:
2900	/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2901	/// ret i32 %retval
2902	/// @endcode
2903	///
2904	/// =>
2905	///
2906	/// @code
2907	/// bb0:
2908	/// %tmp0 = tail call i32 @f0()
2909	/// ret i32 %tmp0
2910	/// bb1:
2911	/// %tmp1 = tail call i32 @f1()
2912	/// ret i32 %tmp1
2913	/// bb2:
2914	/// %tmp2 = tail call i32 @f2()
2915	/// ret i32 %tmp2
2916	/// @endcode
2917	bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2918	ModifyDT &ModifiedDT) {
2919	if (!BB->getTerminator())
2920	return false;
2921
2922	ReturnInst *RetI = dyn_cast<ReturnInst>(Val: BB->getTerminator());
2923	if (!RetI)
2924	return false;
2925
2926	assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2927
2928	PHINode PN = nullptr*;
2929	ExtractValueInst EVI = nullptr*;
2930	BitCastInst BCI = nullptr*;
2931	Value *V = RetI->getReturnValue();
2932	if (V) {
2933	BCI = dyn_cast<BitCastInst>(Val: V);
2934	if (BCI)
2935	V = BCI->getOperand(i_nocapture: `0`);
2936
2937	EVI = dyn_cast<ExtractValueInst>(Val: V);
2938	if (EVI) {
2939	V = EVI->getOperand(i_nocapture: `0`);
2940	if (!llvm::all_of(Range: EVI->indices(), P: [](unsigned idx) { return idx == `0`; }))
2941	return false;
2942	}
2943
2944	PN = dyn_cast<PHINode>(Val: V);
2945	}
2946
2947	if (PN && PN->getParent() != BB)
2948	return false;
2949
2950	auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2951	const BitCastInst *BC = dyn_cast<BitCastInst>(Val: Inst);
2952	if (BC && BC->hasOneUse())
2953	Inst = BC->user_back();
2954
2955	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2956	return II->getIntrinsicID() == Intrinsic::lifetime_end;
2957	return false;
2958	};
2959
2960	SmallVector<const IntrinsicInst *, `4`> FakeUses;
2961
2962	auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2963	if (auto *II = dyn_cast<IntrinsicInst>(Val: Inst);
2964	II && II->getIntrinsicID() == Intrinsic::fake_use) {
2965	// Record the instruction so it can be preserved when the exit block is
2966	// removed. Do not preserve the fake use that uses the result of the
2967	// PHI instruction.
2968	// Do not copy fake uses that use the result of a PHI node.
2969	// FIXME: If we do want to copy the fake use into the return blocks, we
2970	// have to figure out which of the PHI node operands to use for each
2971	// copy.
2972	if (!isa<PHINode>(Val: II->getOperand(i_nocapture: `0`))) {
2973	FakeUses.push_back(Elt: II);
2974	}
2975	return true;
2976	}
2977
2978	return false;
2979	};
2980
2981	// Make sure there are no instructions between the first instruction
2982	// and return.
2983	BasicBlock::const_iterator BI = BB->getFirstNonPHIIt();
2984	// Skip over debug and the bitcast.
2985	while (isa<DbgInfoIntrinsic>(Val: BI) \|\| &BI == BCI \|\| &BI == EVI \|\|
2986	isa<PseudoProbeInst>(Val: BI) \|\| isLifetimeEndOrBitCastFor(&*BI) \|\|
2987	isFakeUse (&*BI))
2988	BI = std::next(x: BI);
2989	if (&*BI != RetI)
2990	return false;
2991
2992	/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2993	/// call.
2994	const Function *F = BB->getParent();
2995	SmallVector<BasicBlock *, `4`> TailCallBBs;
2996	// Record the call instructions so we can insert any fake uses
2997	// that need to be preserved before them.
2998	SmallVector<CallInst *, `4`> CallInsts;
2999	if (PN) {
3000	for (unsigned I = `0`, E = PN->getNumIncomingValues(); I != E; ++I) {
3001	// Look through bitcasts.
3002	Value *IncomingVal = PN->getIncomingValue(i: I)->stripPointerCasts();
3003	CallInst *CI = dyn_cast<CallInst>(Val: IncomingVal);
3004	BasicBlock *PredBB = PN->getIncomingBlock(i: I);
3005	// Make sure the phi value is indeed produced by the tail call.
3006	if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3007	TLI->mayBeEmittedAsTailCall(CI) &&
3008	attributesPermitTailCall(F, I: CI, Ret: RetI, TLI: *TLI)) {
3009	TailCallBBs.push_back(Elt: PredBB);
3010	CallInsts.push_back(Elt: CI);
3011	} else {
3012	// Consider the cases in which the phi value is indirectly produced by
3013	// the tail call, for example when encountering memset(), memmove(),
3014	// strcpy(), whose return value may have been optimized out. In such
3015	// cases, the value needs to be the first function argument.
3016	//
3017	// bb0:
3018	// tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3019	// br label %return
3020	// return:
3021	// %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3022	if (PredBB && PredBB->getSingleSuccessor() == BB)
3023	CI = dyn_cast_or_null<CallInst>(
3024	Val: PredBB->getTerminator()->getPrevNonDebugInstruction(SkipPseudoOp: true));
3025
3026	if (CI && CI->use_empty() &&
3027	isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3028	IncomingVal == CI->getArgOperand(i: `0`) &&
3029	TLI->mayBeEmittedAsTailCall(CI) &&
3030	attributesPermitTailCall(F, I: CI, Ret: RetI, TLI: *TLI)) {
3031	TailCallBBs.push_back(Elt: PredBB);
3032	CallInsts.push_back(Elt: CI);
3033	}
3034	}
3035	}
3036	} else {
3037	SmallPtrSet<BasicBlock *, `4`> VisitedBBs;
3038	for (BasicBlock *Pred : predecessors(BB)) {
3039	if (!VisitedBBs.insert(Ptr: Pred).second)
3040	continue;
3041	if (Instruction I = Pred->rbegin()->getPrevNonDebugInstruction(SkipPseudoOp: true*)) {
3042	CallInst *CI = dyn_cast<CallInst>(Val: I);
3043	if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
3044	attributesPermitTailCall(F, I: CI, Ret: RetI, TLI: *TLI)) {
3045	// Either we return void or the return value must be the first
3046	// argument of a known intrinsic or library function.
3047	if (!V \|\| isa<UndefValue>(Val: V) \|\|
3048	(isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3049	V == CI->getArgOperand(i: `0`))) {
3050	TailCallBBs.push_back(Elt: Pred);
3051	CallInsts.push_back(Elt: CI);
3052	}
3053	}
3054	}
3055	}
3056	}
3057
3058	bool Changed = false;
3059	for (auto const &TailCallBB : TailCallBBs) {
3060	// Make sure the call instruction is followed by an unconditional branch to
3061	// the return block.
3062	BranchInst *BI = dyn_cast<BranchInst>(Val: TailCallBB->getTerminator());
3063	if (!BI \|\| !BI->isUnconditional() \|\| BI->getSuccessor(i: `0`) != BB)
3064	continue;
3065
3066	// Duplicate the return into TailCallBB.
3067	(void)FoldReturnIntoUncondBranch(RI: RetI, BB, Pred: TailCallBB);
3068	assert(!VerifyBFIUpdates \|\|
3069	BFI ->getBlockFreq(BB) >= BFI ->getBlockFreq(TailCallBB));
3070	BFI ->setBlockFreq(BB,
3071	Freq: (BFI ->getBlockFreq(BB) - BFI ->getBlockFreq(BB: TailCallBB)));
3072	ModifiedDT = ModifyDT::ModifyBBDT;
3073	Changed = true;
3074	++NumRetsDup;
3075	}
3076
3077	// If we eliminated all predecessors of the block, delete the block now.
3078	if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3079	// Copy the fake uses found in the original return block to all blocks
3080	// that contain tail calls.
3081	for (auto *CI : CallInsts) {
3082	for (auto const *FakeUse : FakeUses) {
3083	auto *ClonedInst = FakeUse->clone();
3084	ClonedInst->insertBefore(InsertPos: CI->getIterator());
3085	}
3086	}
3087	BB->eraseFromParent();
3088	}
3089
3090	return Changed;
3091	}
3092
3093	//===----------------------------------------------------------------------===//
3094	// Memory Optimization
3095	//===----------------------------------------------------------------------===//
3096
3097	namespace {
3098
3099	/// This is an extended version of TargetLowering::AddrMode
3100	/// which holds actual Value's for register values.*
3101	struct ExtAddrMode : public TargetLowering::AddrMode {
3102	Value BaseReg = nullptr*;
3103	Value ScaledReg = nullptr*;
3104	Value OriginalValue = nullptr*;
3105	bool InBounds = true;
3106
3107	enum FieldName {
3108	NoField = `0x00`,
3109	BaseRegField = `0x01`,
3110	BaseGVField = `0x02`,
3111	BaseOffsField = `0x04`,
3112	ScaledRegField = `0x08`,
3113	ScaleField = `0x10`,
3114	MultipleFields = `0xff`
3115	};
3116
3117	ExtAddrMode() = default;
3118
3119	void print(raw_ostream &OS) const;
3120	void dump() const;
3121
3122	// Replace From in ExtAddrMode with To.
3123	// E.g., SExt insts may be promoted and deleted. We should replace them with
3124	// the promoted values.
3125	void replaceWith(Value From, Value To) {
3126	if (ScaledReg == From)
3127	ScaledReg = To;
3128	}
3129
3130	FieldName compare(const ExtAddrMode &other) {
3131	// First check that the types are the same on each field, as differing types
3132	// is something we can't cope with later on.
3133	if (BaseReg && other.BaseReg &&
3134	BaseReg->getType() != other.BaseReg->getType())
3135	return MultipleFields;
3136	if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3137	return MultipleFields;
3138	if (ScaledReg && other.ScaledReg &&
3139	ScaledReg->getType() != other.ScaledReg->getType())
3140	return MultipleFields;
3141
3142	// Conservatively reject 'inbounds' mismatches.
3143	if (InBounds != other.InBounds)
3144	return MultipleFields;
3145
3146	// Check each field to see if it differs.
3147	unsigned Result = NoField;
3148	if (BaseReg != other.BaseReg)
3149	Result \|= BaseRegField;
3150	if (BaseGV != other.BaseGV)
3151	Result \|= BaseGVField;
3152	if (BaseOffs != other.BaseOffs)
3153	Result \|= BaseOffsField;
3154	if (ScaledReg != other.ScaledReg)
3155	Result \|= ScaledRegField;
3156	// Don't count 0 as being a different scale, because that actually means
3157	// unscaled (which will already be counted by having no ScaledReg).
3158	if (Scale && other.Scale && Scale != other.Scale)
3159	Result \|= ScaleField;
3160
3161	if (llvm::popcount(Value: Result) > `1`)
3162	return MultipleFields;
3163	else
3164	return static_cast<FieldName>(Result);
3165	}
3166
3167	// An AddrMode is trivial if it involves no calculation i.e. it is just a base
3168	// with no offset.
3169	bool isTrivial() {
3170	// An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg Scale) so it is*
3171	// trivial if at most one of these terms is nonzero, except that BaseGV and
3172	// BaseReg both being zero actually means a null pointer value, which we
3173	// consider to be 'non-zero' here.
3174	return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3175	}
3176
3177	Value GetFieldAsValue(FieldName Field, Type IntPtrTy) {
3178	switch (Field) {
3179	default:
3180	return nullptr;
3181	case BaseRegField:
3182	return BaseReg;
3183	case BaseGVField:
3184	return BaseGV;
3185	case ScaledRegField:
3186	return ScaledReg;
3187	case BaseOffsField:
3188	return ConstantInt::get(Ty: IntPtrTy, V: BaseOffs);
3189	}
3190	}
3191
3192	void SetCombinedField(FieldName Field, Value *V,
3193	const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3194	switch (Field) {
3195	default:
3196	llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3197	break;
3198	case ExtAddrMode::BaseRegField:
3199	BaseReg = V;
3200	break;
3201	case ExtAddrMode::BaseGVField:
3202	// A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3203	// in the BaseReg field.
3204	assert(BaseReg == nullptr);
3205	BaseReg = V;
3206	BaseGV = nullptr;
3207	break;
3208	case ExtAddrMode::ScaledRegField:
3209	ScaledReg = V;
3210	// If we have a mix of scaled and unscaled addrmodes then we want scale
3211	// to be the scale and not zero.
3212	if (!Scale)
3213	for (const ExtAddrMode &AM : AddrModes)
3214	if (AM.Scale) {
3215	Scale = AM.Scale;
3216	break;
3217	}
3218	break;
3219	case ExtAddrMode::BaseOffsField:
3220	// The offset is no longer a constant, so it goes in ScaledReg with a
3221	// scale of 1.
3222	assert(ScaledReg == nullptr);
3223	ScaledReg = V;
3224	Scale = `1`;
3225	BaseOffs = `0`;
3226	break;
3227	}
3228	}
3229	};
3230
3231	#ifndef NDEBUG
3232	static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3233	AM.print(OS);
3234	return OS;
3235	}
3236	#endif
3237
3238	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3239	void ExtAddrMode::print(raw_ostream &OS) const {
3240	bool NeedPlus = false;
3241	OS << "[";
3242	if (InBounds)
3243	OS << "inbounds ";
3244	if (BaseGV) {
3245	OS << "GV:";
3246	BaseGV->printAsOperand(O&: OS, /PrintType=/false);
3247	NeedPlus = true;
3248	}
3249
3250	if (BaseOffs) {
3251	OS << (NeedPlus ? " + " : "") << BaseOffs;
3252	NeedPlus = true;
3253	}
3254
3255	if (BaseReg) {
3256	OS << (NeedPlus ? " + " : "") << "Base:";
3257	BaseReg->printAsOperand(O&: OS, /PrintType=/false);
3258	NeedPlus = true;
3259	}
3260	if (Scale) {
3261	OS << (NeedPlus ? " + " : "") << Scale << "*";
3262	ScaledReg->printAsOperand(O&: OS, /PrintType=/false);
3263	}
3264
3265	OS << `']'`;
3266	}
3267
3268	LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3269	print(OS&: dbgs());
3270	dbgs() << `'\n'`;
3271	}
3272	#endif
3273
3274	} // end anonymous namespace
3275
3276	namespace {
3277
3278	/// This class provides transaction based operation on the IR.
3279	/// Every change made through this class is recorded in the internal state and
3280	/// can be undone (rollback) until commit is called.
3281	/// CGP does not check if instructions could be speculatively executed when
3282	/// moved. Preserving the original location would pessimize the debugging
3283	/// experience, as well as negatively impact the quality of sample PGO.
3284	class TypePromotionTransaction {
3285	/// This represents the common interface of the individual transaction.
3286	/// Each class implements the logic for doing one specific modification on
3287	/// the IR via the TypePromotionTransaction.
3288	class TypePromotionAction {
3289	protected:
3290	/// The Instruction modified.
3291	Instruction *Inst;
3292
3293	public:
3294	/// Constructor of the action.
3295	/// The constructor performs the related action on the IR.
3296	TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3297
3298	virtual ~TypePromotionAction() = default;
3299
3300	/// Undo the modification done by this action.
3301	/// When this method is called, the IR must be in the same state as it was
3302	/// before this action was applied.
3303	/// \pre Undoing the action works if and only if the IR is in the exact same
3304	/// state as it was directly after this action was applied.
3305	virtual void undo() = `0`;
3306
3307	/// Advocate every change made by this action.
3308	/// When the results on the IR of the action are to be kept, it is important
3309	/// to call this function, otherwise hidden information may be kept forever.
3310	virtual void commit() {
3311	// Nothing to be done, this action is not doing anything.
3312	}
3313	};
3314
3315	/// Utility to remember the position of an instruction.
3316	class InsertionHandler {
3317	/// Position of an instruction.
3318	/// Either an instruction:
3319	/// - Is the first in a basic block: BB is used.
3320	/// - Has a previous instruction: PrevInst is used.
3321	struct {
3322	BasicBlock::iterator PrevInst;
3323	BasicBlock *BB;
3324	} Point;
3325	std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3326
3327	/// Remember whether or not the instruction had a previous instruction.
3328	bool HasPrevInstruction;
3329
3330	public:
3331	/// Record the position of \p Inst.
3332	InsertionHandler(Instruction *Inst) {
3333	HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3334	BasicBlock *BB = Inst->getParent();
3335
3336	// Record where we would have to re-insert the instruction in the sequence
3337	// of DbgRecords, if we ended up reinserting.
3338	if (BB->IsNewDbgInfoFormat)
3339	BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3340
3341	if (HasPrevInstruction) {
3342	Point.PrevInst = std::prev(x: Inst->getIterator());
3343	} else {
3344	Point.BB = BB;
3345	}
3346	}
3347
3348	/// Insert \p Inst at the recorded position.
3349	void insert(Instruction *Inst) {
3350	if (HasPrevInstruction) {
3351	if (Inst->getParent())
3352	Inst->removeFromParent();
3353	Inst->insertAfter(InsertPos: Point.PrevInst);
3354	} else {
3355	BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3356	if (Inst->getParent())
3357	Inst->moveBefore(BB&: *Point.BB, I: Position);
3358	else
3359	Inst->insertBefore(BB&: *Point.BB, InsertPos: Position);
3360	}
3361
3362	Inst->getParent()->reinsertInstInDbgRecords(I: Inst, Pos: BeforeDbgRecord);
3363	}
3364	};
3365
3366	/// Move an instruction before another.
3367	class InstructionMoveBefore : public TypePromotionAction {
3368	/// Original position of the instruction.
3369	InsertionHandler Position;
3370
3371	public:
3372	/// Move \p Inst before \p Before.
3373	InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3374	: TypePromotionAction (Inst), Position (Inst) {
3375	LLVM_DEBUG(dbgs() << "Do: move: " << Inst << "\nbefore: " << Before
3376	<< "\n");
3377	Inst->moveBefore(InsertPos: Before);
3378	}
3379
3380	/// Move the instruction back to its original position.
3381	void undo() override {
3382	LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3383	Position.insert(Inst);
3384	}
3385	};
3386
3387	/// Set the operand of an instruction with a new value.
3388	class OperandSetter : public TypePromotionAction {
3389	/// Original operand of the instruction.
3390	Value *Origin;
3391
3392	/// Index of the modified instruction.
3393	unsigned Idx;
3394
3395	public:
3396	/// Set \p Idx operand of \p Inst with \p NewVal.
3397	OperandSetter(Instruction Inst, unsigned* Idx, Value *NewVal)
3398	: TypePromotionAction (Inst), Idx(Idx) {
3399	LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3400	<< "for:" << *Inst << "\n"
3401	<< "with:" << *NewVal << "\n");
3402	Origin = Inst->getOperand(i: Idx);
3403	Inst->setOperand(i: Idx, Val: NewVal);
3404	}
3405
3406	/// Restore the original value of the instruction.
3407	void undo() override {
3408	LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3409	<< "for: " << *Inst << "\n"
3410	<< "with: " << *Origin << "\n");
3411	Inst->setOperand(i: Idx, Val: Origin);
3412	}
3413	};
3414
3415	/// Hide the operands of an instruction.
3416	/// Do as if this instruction was not using any of its operands.
3417	class OperandsHider : public TypePromotionAction {
3418	/// The list of original operands.
3419	SmallVector<Value *, `4`> OriginalValues;
3420
3421	public:
3422	/// Remove \p Inst from the uses of the operands of \p Inst.
3423	OperandsHider(Instruction *Inst) : TypePromotionAction (Inst) {
3424	LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3425	unsigned NumOpnds = Inst->getNumOperands();
3426	OriginalValues.reserve(N: NumOpnds);
3427	for (unsigned It = `0`; It < NumOpnds; ++It) {
3428	// Save the current operand.
3429	Value *Val = Inst->getOperand(i: It);
3430	OriginalValues.push_back(Elt: Val);
3431	// Set a dummy one.
3432	// We could use OperandSetter here, but that would imply an overhead
3433	// that we are not willing to pay.
3434	Inst->setOperand(i: It, Val: PoisonValue::get(T: Val->getType()));
3435	}
3436	}
3437
3438	/// Restore the original list of uses.
3439	void undo() override {
3440	LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3441	for (unsigned It = `0`, EndIt = OriginalValues.size(); It != EndIt; ++It)
3442	Inst->setOperand(i: It, Val: OriginalValues [It]);
3443	}
3444	};
3445
3446	/// Build a truncate instruction.
3447	class TruncBuilder : public TypePromotionAction {
3448	Value *Val;
3449
3450	public:
3451	/// Build a truncate instruction of \p Opnd producing a \p Ty
3452	/// result.
3453	/// trunc Opnd to Ty.
3454	TruncBuilder(Instruction Opnd, Type Ty) : TypePromotionAction (Opnd) {
3455	IRBuilder<> Builder(Opnd);
3456	Builder.SetCurrentDebugLocation(DebugLoc ());
3457	Val = Builder.CreateTrunc(V: Opnd, DestTy: Ty, Name: "promoted");
3458	LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3459	}
3460
3461	/// Get the built value.
3462	Value getBuiltValue() { return* Val; }
3463
3464	/// Remove the built instruction.
3465	void undo() override {
3466	LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3467	if (Instruction *IVal = dyn_cast<Instruction>(Val))
3468	IVal->eraseFromParent();
3469	}
3470	};
3471
3472	/// Build a sign extension instruction.
3473	class SExtBuilder : public TypePromotionAction {
3474	Value *Val;
3475
3476	public:
3477	/// Build a sign extension instruction of \p Opnd producing a \p Ty
3478	/// result.
3479	/// sext Opnd to Ty.
3480	SExtBuilder(Instruction InsertPt, Value Opnd, Type *Ty)
3481	: TypePromotionAction (InsertPt) {
3482	IRBuilder<> Builder(InsertPt);
3483	Val = Builder.CreateSExt(V: Opnd, DestTy: Ty, Name: "promoted");
3484	LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3485	}
3486
3487	/// Get the built value.
3488	Value getBuiltValue() { return* Val; }
3489
3490	/// Remove the built instruction.
3491	void undo() override {
3492	LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3493	if (Instruction *IVal = dyn_cast<Instruction>(Val))
3494	IVal->eraseFromParent();
3495	}
3496	};
3497
3498	/// Build a zero extension instruction.
3499	class ZExtBuilder : public TypePromotionAction {
3500	Value *Val;
3501
3502	public:
3503	/// Build a zero extension instruction of \p Opnd producing a \p Ty
3504	/// result.
3505	/// zext Opnd to Ty.
3506	ZExtBuilder(Instruction InsertPt, Value Opnd, Type *Ty)
3507	: TypePromotionAction (InsertPt) {
3508	IRBuilder<> Builder(InsertPt);
3509	Builder.SetCurrentDebugLocation(DebugLoc ());
3510	Val = Builder.CreateZExt(V: Opnd, DestTy: Ty, Name: "promoted");
3511	LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3512	}
3513
3514	/// Get the built value.
3515	Value getBuiltValue() { return* Val; }
3516
3517	/// Remove the built instruction.
3518	void undo() override {
3519	LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3520	if (Instruction *IVal = dyn_cast<Instruction>(Val))
3521	IVal->eraseFromParent();
3522	}
3523	};
3524
3525	/// Mutate an instruction to another type.
3526	class TypeMutator : public TypePromotionAction {
3527	/// Record the original type.
3528	Type *OrigTy;
3529
3530	public:
3531	/// Mutate the type of \p Inst into \p NewTy.
3532	TypeMutator(Instruction Inst, Type NewTy)
3533	: TypePromotionAction (Inst), OrigTy(Inst->getType()) {
3534	LLVM_DEBUG(dbgs() << "Do: MutateType: " << Inst << " with " << NewTy
3535	<< "\n");
3536	Inst->mutateType(Ty: NewTy);
3537	}
3538
3539	/// Mutate the instruction back to its original type.
3540	void undo() override {
3541	LLVM_DEBUG(dbgs() << "Undo: MutateType: " << Inst << " with " << OrigTy
3542	<< "\n");
3543	Inst->mutateType(Ty: OrigTy);
3544	}
3545	};
3546
3547	/// Replace the uses of an instruction by another instruction.
3548	class UsesReplacer : public TypePromotionAction {
3549	/// Helper structure to keep track of the replaced uses.
3550	struct InstructionAndIdx {
3551	/// The instruction using the instruction.
3552	Instruction *Inst;
3553
3554	/// The index where this instruction is used for Inst.
3555	unsigned Idx;
3556
3557	InstructionAndIdx(Instruction Inst, unsigned* Idx)
3558	: Inst(Inst), Idx(Idx) {}
3559	};
3560
3561	/// Keep track of the original uses (pair Instruction, Index).
3562	SmallVector<InstructionAndIdx, `4`> OriginalUses;
3563	/// Keep track of the debug users.
3564	SmallVector<DbgValueInst *, `1`> DbgValues;
3565	/// And non-instruction debug-users too.
3566	SmallVector<DbgVariableRecord *, `1`> DbgVariableRecords;
3567
3568	/// Keep track of the new value so that we can undo it by replacing
3569	/// instances of the new value with the original value.
3570	Value *New;
3571
3572	using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
3573
3574	public:
3575	/// Replace all the use of \p Inst by \p New.
3576	UsesReplacer(Instruction Inst, Value New)
3577	: TypePromotionAction (Inst), New(New) {
3578	LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << Inst << " with " << New
3579	<< "\n");
3580	// Record the original uses.
3581	for (Use &U : Inst->uses()) {
3582	Instruction *UserI = cast<Instruction>(Val: U.getUser());
3583	OriginalUses.push_back(Elt: InstructionAndIdx (UserI, U.getOperandNo()));
3584	}
3585	// Record the debug uses separately. They are not in the instruction's
3586	// use list, but they are replaced by RAUW.
3587	findDbgValues(DbgValues, V: Inst, DbgVariableRecords: &DbgVariableRecords);
3588
3589	// Now, we can replace the uses.
3590	Inst->replaceAllUsesWith(V: New);
3591	}
3592
3593	/// Reassign the original uses of Inst to Inst.
3594	void undo() override {
3595	LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3596	for (InstructionAndIdx &Use : OriginalUses)
3597	Use.Inst->setOperand(i: Use.Idx, Val: Inst);
3598	// RAUW has replaced all original uses with references to the new value,
3599	// including the debug uses. Since we are undoing the replacements,
3600	// the original debug uses must also be reinstated to maintain the
3601	// correctness and utility of debug value instructions.
3602	for (auto *DVI : DbgValues)
3603	DVI->replaceVariableLocationOp(OldValue: New, NewValue: Inst);
3604	// Similar story with DbgVariableRecords, the non-instruction
3605	// representation of dbg.values.
3606	for (DbgVariableRecord *DVR : DbgVariableRecords)
3607	DVR->replaceVariableLocationOp(OldValue: New, NewValue: Inst);
3608	}
3609	};
3610
3611	/// Remove an instruction from the IR.
3612	class InstructionRemover : public TypePromotionAction {
3613	/// Original position of the instruction.
3614	InsertionHandler Inserter;
3615
3616	/// Helper structure to hide all the link to the instruction. In other
3617	/// words, this helps to do as if the instruction was removed.
3618	OperandsHider Hider;
3619
3620	/// Keep track of the uses replaced, if any.
3621	UsesReplacer Replacer = nullptr*;
3622
3623	/// Keep track of instructions removed.
3624	SetOfInstrs &RemovedInsts;
3625
3626	public:
3627	/// Remove all reference of \p Inst and optionally replace all its
3628	/// uses with New.
3629	/// \p RemovedInsts Keep track of the instructions removed by this Action.
3630	/// \pre If !Inst->use_empty(), then New != nullptr
3631	InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3632	Value New = nullptr*)
3633	: TypePromotionAction (Inst), Inserter (Inst), Hider (Inst),
3634	RemovedInsts(RemovedInsts) {
3635	if (New)
3636	Replacer = new UsesReplacer (Inst, New);
3637	LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3638	RemovedInsts.insert(Ptr: Inst);
3639	/// The instructions removed here will be freed after completing
3640	/// optimizeBlock() for all blocks as we need to keep track of the
3641	/// removed instructions during promotion.
3642	Inst->removeFromParent();
3643	}
3644
3645	~InstructionRemover() override { delete Replacer; }
3646
3647	InstructionRemover &operator=(const InstructionRemover &other) = delete;
3648	InstructionRemover(const InstructionRemover &other) = delete;
3649
3650	/// Resurrect the instruction and reassign it to the proper uses if
3651	/// new value was provided when build this action.
3652	void undo() override {
3653	LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3654	Inserter.insert(Inst);
3655	if (Replacer)
3656	Replacer->undo();
3657	Hider.undo();
3658	RemovedInsts.erase(Ptr: Inst);
3659	}
3660	};
3661
3662	public:
3663	/// Restoration point.
3664	/// The restoration point is a pointer to an action instead of an iterator
3665	/// because the iterator may be invalidated but not the pointer.
3666	using ConstRestorationPt = const TypePromotionAction *;
3667
3668	TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3669	: RemovedInsts(RemovedInsts) {}
3670
3671	/// Advocate every changes made in that transaction. Return true if any change
3672	/// happen.
3673	bool commit();
3674
3675	/// Undo all the changes made after the given point.
3676	void rollback(ConstRestorationPt Point);
3677
3678	/// Get the current restoration point.
3679	ConstRestorationPt getRestorationPoint() const;
3680
3681	/// \name API for IR modification with state keeping to support rollback.
3682	/// @{
3683	/// Same as Instruction::setOperand.
3684	void setOperand(Instruction Inst, unsigned* Idx, Value *NewVal);
3685
3686	/// Same as Instruction::eraseFromParent.
3687	void eraseInstruction(Instruction Inst, Value NewVal = nullptr);
3688
3689	/// Same as Value::replaceAllUsesWith.
3690	void replaceAllUsesWith(Instruction Inst, Value New);
3691
3692	/// Same as Value::mutateType.
3693	void mutateType(Instruction Inst, Type NewTy);
3694
3695	/// Same as IRBuilder::createTrunc.
3696	Value createTrunc(Instruction Opnd, Type *Ty);
3697
3698	/// Same as IRBuilder::createSExt.
3699	Value createSExt(Instruction Inst, Value Opnd, Type Ty);
3700
3701	/// Same as IRBuilder::createZExt.
3702	Value createZExt(Instruction Inst, Value Opnd, Type Ty);
3703
3704	private:
3705	/// The ordered list of actions made so far.
3706	SmallVector<std::unique_ptr<TypePromotionAction>, `16`> Actions;
3707
3708	using CommitPt =
3709	SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3710
3711	SetOfInstrs &RemovedInsts;
3712	};
3713
3714	} // end anonymous namespace
3715
3716	void TypePromotionTransaction::setOperand(Instruction Inst, unsigned* Idx,
3717	Value *NewVal) {
3718	Actions.push_back(Elt: std::make_unique<TypePromotionTransaction::OperandSetter>(
3719	args&: Inst, args&: Idx, args&: NewVal));
3720	}
3721
3722	void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3723	Value *NewVal) {
3724	Actions.push_back(
3725	Elt: std::make_unique<TypePromotionTransaction::InstructionRemover>(
3726	args&: Inst, args&: RemovedInsts, args&: NewVal));
3727	}
3728
3729	void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3730	Value *New) {
3731	Actions.push_back(
3732	Elt: std::make_unique<TypePromotionTransaction::UsesReplacer>(args&: Inst, args&: New));
3733	}
3734
3735	void TypePromotionTransaction::mutateType(Instruction Inst, Type NewTy) {
3736	Actions.push_back(
3737	Elt: std::make_unique<TypePromotionTransaction::TypeMutator>(args&: Inst, args&: NewTy));
3738	}
3739
3740	Value TypePromotionTransaction::createTrunc(Instruction Opnd, Type *Ty) {
3741	std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder (Opnd, Ty));
3742	Value *Val = Ptr ->getBuiltValue();
3743	Actions.push_back(Elt: std::move(Ptr));
3744	return Val;
3745	}
3746
3747	Value TypePromotionTransaction::createSExt(Instruction Inst, Value *Opnd,
3748	Type *Ty) {
3749	std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder (Inst, Opnd, Ty));
3750	Value *Val = Ptr ->getBuiltValue();
3751	Actions.push_back(Elt: std::move(Ptr));
3752	return Val;
3753	}
3754
3755	Value TypePromotionTransaction::createZExt(Instruction Inst, Value *Opnd,
3756	Type *Ty) {
3757	std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder (Inst, Opnd, Ty));
3758	Value *Val = Ptr ->getBuiltValue();
3759	Actions.push_back(Elt: std::move(Ptr));
3760	return Val;
3761	}
3762
3763	TypePromotionTransaction::ConstRestorationPt
3764	TypePromotionTransaction::getRestorationPoint() const {
3765	return !Actions.empty() ? Actions.back().get() : nullptr;
3766	}
3767
3768	bool TypePromotionTransaction::commit() {
3769	for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3770	Action ->commit();
3771	bool Modified = !Actions.empty();
3772	Actions.clear();
3773	return Modified;
3774	}
3775
3776	void TypePromotionTransaction::rollback(
3777	TypePromotionTransaction::ConstRestorationPt Point) {
3778	while (!Actions.empty() && Point != Actions.back().get()) {
3779	std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3780	Curr ->undo();
3781	}
3782	}
3783
3784	namespace {
3785
3786	/// A helper class for matching addressing modes.
3787	///
3788	/// This encapsulates the logic for matching the target-legal addressing modes.
3789	class AddressingModeMatcher {
3790	SmallVectorImpl<Instruction *> &AddrModeInsts;
3791	const TargetLowering &TLI;
3792	const TargetRegisterInfo &TRI;
3793	const DataLayout &DL;
3794	const LoopInfo &LI;
3795	const std::function<const DominatorTree &()> getDTFn;
3796
3797	/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3798	/// the memory instruction that we're computing this address for.
3799	Type *AccessTy;
3800	unsigned AddrSpace;
3801	Instruction *MemoryInst;
3802
3803	/// This is the addressing mode that we're building up. This is
3804	/// part of the return value of this addressing mode matching stuff.
3805	ExtAddrMode &AddrMode;
3806
3807	/// The instructions inserted by other CodeGenPrepare optimizations.
3808	const SetOfInstrs &InsertedInsts;
3809
3810	/// A map from the instructions to their type before promotion.
3811	InstrToOrigTy &PromotedInsts;
3812
3813	/// The ongoing transaction where every action should be registered.
3814	TypePromotionTransaction &TPT;
3815
3816	// A GEP which has too large offset to be folded into the addressing mode.
3817	std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3818
3819	/// This is set to true when we should not do profitability checks.
3820	/// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3821	bool IgnoreProfitability;
3822
3823	/// True if we are optimizing for size.
3824	bool OptSize = false;
3825
3826	ProfileSummaryInfo *PSI;
3827	BlockFrequencyInfo *BFI;
3828
3829	AddressingModeMatcher(
3830	SmallVectorImpl<Instruction > &AMI, const* TargetLowering &TLI,
3831	const TargetRegisterInfo &TRI, const LoopInfo &LI,
3832	const std::function<const DominatorTree &()> getDTFn, Type *AT,
3833	unsigned AS, Instruction *MI, ExtAddrMode &AM,
3834	const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3835	TypePromotionTransaction &TPT,
3836	std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3837	bool OptSize, ProfileSummaryInfo PSI, BlockFrequencyInfo BFI)
3838	: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3839	DL(MI->getDataLayout()), LI(LI), getDTFn (getDTFn),
3840	AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3841	InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3842	LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3843	IgnoreProfitability = false;
3844	}
3845
3846	public:
3847	/// Find the maximal addressing mode that a load/store of V can fold,
3848	/// give an access type of AccessTy. This returns a list of involved
3849	/// instructions in AddrModeInsts.
3850	/// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3851	/// optimizations.
3852	/// \p PromotedInsts maps the instructions to their type before promotion.
3853	/// \p The ongoing transaction where every action should be registered.
3854	static ExtAddrMode
3855	Match(Value V, Type AccessTy, unsigned AS, Instruction *MemoryInst,
3856	SmallVectorImpl<Instruction *> &AddrModeInsts,
3857	const TargetLowering &TLI, const LoopInfo &LI,
3858	const std::function<const DominatorTree &()> getDTFn,
3859	const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3860	InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3861	std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3862	bool OptSize, ProfileSummaryInfo PSI, BlockFrequencyInfo BFI) {
3863	ExtAddrMode Result;
3864
3865	bool Success = AddressingModeMatcher (AddrModeInsts, TLI, TRI, LI, getDTFn,
3866	AccessTy, AS, MemoryInst, Result,
3867	InsertedInsts, PromotedInsts, TPT,
3868	LargeOffsetGEP, OptSize, PSI, BFI)
3869	.matchAddr(Addr: V, Depth: `0`);
3870	(void)Success;
3871	assert(Success && "Couldn't select anything?");
3872	return Result;
3873	}
3874
3875	private:
3876	bool matchScaledValue(Value ScaleReg, int64_t Scale, unsigned* Depth);
3877	bool matchAddr(Value Addr, unsigned* Depth);
3878	bool matchOperationAddr(User AddrInst, unsigned* Opcode, unsigned Depth,
3879	bool MovedAway = nullptr*);
3880	bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3881	ExtAddrMode &AMBefore,
3882	ExtAddrMode &AMAfter);
3883	bool valueAlreadyLiveAtInst(Value Val, Value KnownLive1, Value *KnownLive2);
3884	bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3885	Value PromotedOperand) const*;
3886	};
3887
3888	class PhiNodeSet;
3889
3890	/// An iterator for PhiNodeSet.
3891	class PhiNodeSetIterator {
3892	PhiNodeSet *const Set;
3893	size_t CurrentIndex = `0`;
3894
3895	public:
3896	/// The constructor. Start should point to either a valid element, or be equal
3897	/// to the size of the underlying SmallVector of the PhiNodeSet.
3898	PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3899	PHINode *operator() const*;
3900	PhiNodeSetIterator &operator++();
3901	bool operator==(const PhiNodeSetIterator &RHS) const;
3902	bool operator!=(const PhiNodeSetIterator &RHS) const;
3903	};
3904
3905	/// Keeps a set of PHINodes.
3906	///
3907	/// This is a minimal set implementation for a specific use case:
3908	/// It is very fast when there are very few elements, but also provides good
3909	/// performance when there are many. It is similar to SmallPtrSet, but also
3910	/// provides iteration by insertion order, which is deterministic and stable
3911	/// across runs. It is also similar to SmallSetVector, but provides removing
3912	/// elements in O(1) time. This is achieved by not actually removing the element
3913	/// from the underlying vector, so comes at the cost of using more memory, but
3914	/// that is fine, since PhiNodeSets are used as short lived objects.
3915	class PhiNodeSet {
3916	friend class PhiNodeSetIterator;
3917
3918	using MapType = SmallDenseMap<PHINode *, size_t, `32`>;
3919	using iterator = PhiNodeSetIterator;
3920
3921	/// Keeps the elements in the order of their insertion in the underlying
3922	/// vector. To achieve constant time removal, it never deletes any element.
3923	SmallVector<PHINode *, `32`> NodeList;
3924
3925	/// Keeps the elements in the underlying set implementation. This (and not the
3926	/// NodeList defined above) is the source of truth on whether an element
3927	/// is actually in the collection.
3928	MapType NodeMap;
3929
3930	/// Points to the first valid (not deleted) element when the set is not empty
3931	/// and the value is not zero. Equals to the size of the underlying vector
3932	/// when the set is empty. When the value is 0, as in the beginning, the
3933	/// first element may or may not be valid.
3934	size_t FirstValidElement = `0`;
3935
3936	public:
3937	/// Inserts a new element to the collection.
3938	/// \returns true if the element is actually added, i.e. was not in the
3939	/// collection before the operation.
3940	bool insert(PHINode *Ptr) {
3941	if (NodeMap.insert(KV: std::make_pair(x&: Ptr, y: NodeList.size())).second) {
3942	NodeList.push_back(Elt: Ptr);
3943	return true;
3944	}
3945	return false;
3946	}
3947
3948	/// Removes the element from the collection.
3949	/// \returns whether the element is actually removed, i.e. was in the
3950	/// collection before the operation.
3951	bool erase(PHINode *Ptr) {
3952	if (NodeMap.erase(Val: Ptr)) {
3953	SkipRemovedElements(CurrentIndex&: FirstValidElement);
3954	return true;
3955	}
3956	return false;
3957	}
3958
3959	/// Removes all elements and clears the collection.
3960	void clear() {
3961	NodeMap.clear();
3962	NodeList.clear();
3963	FirstValidElement = `0`;
3964	}
3965
3966	/// \returns an iterator that will iterate the elements in the order of
3967	/// insertion.
3968	iterator begin() {
3969	if (FirstValidElement == `0`)
3970	SkipRemovedElements(CurrentIndex&: FirstValidElement);
3971	return PhiNodeSetIterator (this, FirstValidElement);
3972	}
3973
3974	/// \returns an iterator that points to the end of the collection.
3975	iterator end() { return PhiNodeSetIterator (this, NodeList.size()); }
3976
3977	/// Returns the number of elements in the collection.
3978	size_t size() const { return NodeMap.size(); }
3979
3980	/// \returns 1 if the given element is in the collection, and 0 if otherwise.
3981	size_t count(PHINode Ptr) const* { return NodeMap.count(Val: Ptr); }
3982
3983	private:
3984	/// Updates the CurrentIndex so that it will point to a valid element.
3985	///
3986	/// If the element of NodeList at CurrentIndex is valid, it does not
3987	/// change it. If there are no more valid elements, it updates CurrentIndex
3988	/// to point to the end of the NodeList.
3989	void SkipRemovedElements(size_t &CurrentIndex) {
3990	while (CurrentIndex < NodeList.size()) {
3991	auto it = NodeMap.find(Val: NodeList [CurrentIndex]);
3992	// If the element has been deleted and added again later, NodeMap will
3993	// point to a different index, so CurrentIndex will still be invalid.
3994	if (it != NodeMap.end() && it ->second == CurrentIndex)
3995	break;
3996	++CurrentIndex;
3997	}
3998	}
3999	};
4000
4001	PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4002	: Set(Set), CurrentIndex(Start) {}
4003
4004	PHINode PhiNodeSetIterator::operator*() const* {
4005	assert(CurrentIndex < Set->NodeList.size() &&
4006	"PhiNodeSet access out of range");
4007	return Set->NodeList [CurrentIndex];
4008	}
4009
4010	PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4011	assert(CurrentIndex < Set->NodeList.size() &&
4012	"PhiNodeSet access out of range");
4013	++CurrentIndex;
4014	Set->SkipRemovedElements(CurrentIndex);
4015	return *this;
4016	}
4017
4018	bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4019	return CurrentIndex == RHS.CurrentIndex;
4020	}
4021
4022	bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4023	return !((*this) == RHS);
4024	}
4025
4026	/// Keep track of simplification of Phi nodes.
4027	/// Accept the set of all phi nodes and erase phi node from this set
4028	/// if it is simplified.
4029	class SimplificationTracker {
4030	DenseMap<Value , Value > Storage;
4031	const SimplifyQuery &SQ;
4032	// Tracks newly created Phi nodes. The elements are iterated by insertion
4033	// order.
4034	PhiNodeSet AllPhiNodes;
4035	// Tracks newly created Select nodes.
4036	SmallPtrSet<SelectInst *, `32`> AllSelectNodes;
4037
4038	public:
4039	SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
4040
4041	Value Get(Value V) {
4042	do {
4043	auto SV = Storage.find(Val: V);
4044	if (SV == Storage.end())
4045	return V;
4046	V = SV ->second;
4047	} while (true);
4048	}
4049
4050	Value Simplify(Value Val) {
4051	SmallVector<Value *, `32`> WorkList;
4052	SmallPtrSet<Value *, `32`> Visited;
4053	WorkList.push_back(Elt: Val);
4054	while (!WorkList.empty()) {
4055	auto *P = WorkList.pop_back_val();
4056	if (!Visited.insert(Ptr: P).second)
4057	continue;
4058	if (auto *PI = dyn_cast<Instruction>(Val: P))
4059	if (Value *V = simplifyInstruction(I: cast<Instruction>(Val: PI), Q: SQ)) {
4060	for (auto *U : PI->users())
4061	WorkList.push_back(Elt: cast<Value>(Val: U));
4062	Put(From: PI, To: V);
4063	PI->replaceAllUsesWith(V);
4064	if (auto *PHI = dyn_cast<PHINode>(Val: PI))
4065	AllPhiNodes.erase(Ptr: PHI);
4066	if (auto *Select = dyn_cast<SelectInst>(Val: PI))
4067	AllSelectNodes.erase(Ptr: Select);
4068	PI->eraseFromParent();
4069	}
4070	}
4071	return Get(V: Val);
4072	}
4073
4074	void Put(Value From, Value To) { Storage.insert(KV: {From, To}); }
4075
4076	void ReplacePhi(PHINode From, PHINode To) {
4077	Value *OldReplacement = Get(V: From);
4078	while (OldReplacement != From) {
4079	From = To;
4080	To = dyn_cast<PHINode>(Val: OldReplacement);
4081	OldReplacement = Get(V: From);
4082	}
4083	assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4084	Put(From, To);
4085	From->replaceAllUsesWith(V: To);
4086	AllPhiNodes.erase(Ptr: From);
4087	From->eraseFromParent();
4088	}
4089
4090	PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4091
4092	void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(Ptr: PN); }
4093
4094	void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(Ptr: SI); }
4095
4096	unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4097
4098	unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4099
4100	void destroyNewNodes(Type *CommonType) {
4101	// For safe erasing, replace the uses with dummy value first.
4102	auto *Dummy = PoisonValue::get(T: CommonType);
4103	for (auto *I : AllPhiNodes) {
4104	I->replaceAllUsesWith(V: Dummy);
4105	I->eraseFromParent();
4106	}
4107	AllPhiNodes.clear();
4108	for (auto *I : AllSelectNodes) {
4109	I->replaceAllUsesWith(V: Dummy);
4110	I->eraseFromParent();
4111	}
4112	AllSelectNodes.clear();
4113	}
4114	};
4115
4116	/// A helper class for combining addressing modes.
4117	class AddressingModeCombiner {
4118	typedef DenseMap<Value , Value > FoldAddrToValueMapping;
4119	typedef std::pair<PHINode , PHINode > PHIPair;
4120
4121	private:
4122	/// The addressing modes we've collected.
4123	SmallVector<ExtAddrMode, `16`> AddrModes;
4124
4125	/// The field in which the AddrModes differ, when we have more than one.
4126	ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4127
4128	/// Are the AddrModes that we have all just equal to their original values?
4129	bool AllAddrModesTrivial = true;
4130
4131	/// Common Type for all different fields in addressing modes.
4132	Type CommonType = nullptr*;
4133
4134	/// SimplifyQuery for simplifyInstruction utility.
4135	const SimplifyQuery &SQ;
4136
4137	/// Original Address.
4138	Value *Original;
4139
4140	/// Common value among addresses
4141	Value CommonValue = nullptr*;
4142
4143	public:
4144	AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
4145	: SQ(_SQ), Original(OriginalValue) {}
4146
4147	~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4148
4149	/// Get the combined AddrMode
4150	const ExtAddrMode &getAddrMode() const { return AddrModes [`0`]; }
4151
4152	/// Add a new AddrMode if it's compatible with the AddrModes we already
4153	/// have.
4154	/// \return True iff we succeeded in doing so.
4155	bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4156	// Take note of if we have any non-trivial AddrModes, as we need to detect
4157	// when all AddrModes are trivial as then we would introduce a phi or select
4158	// which just duplicates what's already there.
4159	AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4160
4161	// If this is the first addrmode then everything is fine.
4162	if (AddrModes.empty()) {
4163	AddrModes.emplace_back(Args&: NewAddrMode);
4164	return true;
4165	}
4166
4167	// Figure out how different this is from the other address modes, which we
4168	// can do just by comparing against the first one given that we only care
4169	// about the cumulative difference.
4170	ExtAddrMode::FieldName ThisDifferentField =
4171	AddrModes [`0`].compare(other: NewAddrMode);
4172	if (DifferentField == ExtAddrMode::NoField)
4173	DifferentField = ThisDifferentField;
4174	else if (DifferentField != ThisDifferentField)
4175	DifferentField = ExtAddrMode::MultipleFields;
4176
4177	// If NewAddrMode differs in more than one dimension we cannot handle it.
4178	bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4179
4180	// If Scale Field is different then we reject.
4181	CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4182
4183	// We also must reject the case when base offset is different and
4184	// scale reg is not null, we cannot handle this case due to merge of
4185	// different offsets will be used as ScaleReg.
4186	CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField \|\|
4187	!NewAddrMode.ScaledReg);
4188
4189	// We also must reject the case when GV is different and BaseReg installed
4190	// due to we want to use base reg as a merge of GV values.
4191	CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField \|\|
4192	!NewAddrMode.HasBaseReg);
4193
4194	// Even if NewAddMode is the same we still need to collect it due to
4195	// original value is different. And later we will need all original values
4196	// as anchors during finding the common Phi node.
4197	if (CanHandle)
4198	AddrModes.emplace_back(Args&: NewAddrMode);
4199	else
4200	AddrModes.clear();
4201
4202	return CanHandle;
4203	}
4204
4205	/// Combine the addressing modes we've collected into a single
4206	/// addressing mode.
4207	/// \return True iff we successfully combined them or we only had one so
4208	/// didn't need to combine them anyway.
4209	bool combineAddrModes() {
4210	// If we have no AddrModes then they can't be combined.
4211	if (AddrModes.size() == `0`)
4212	return false;
4213
4214	// A single AddrMode can trivially be combined.
4215	if (AddrModes.size() == `1` \|\| DifferentField == ExtAddrMode::NoField)
4216	return true;
4217
4218	// If the AddrModes we collected are all just equal to the value they are
4219	// derived from then combining them wouldn't do anything useful.
4220	if (AllAddrModesTrivial)
4221	return false;
4222
4223	if (!addrModeCombiningAllowed())
4224	return false;
4225
4226	// Build a map between <original value, basic block where we saw it> to
4227	// value of base register.
4228	// Bail out if there is no common type.
4229	FoldAddrToValueMapping Map;
4230	if (!initializeMap(Map))
4231	return false;
4232
4233	CommonValue = findCommon(Map);
4234	if (CommonValue)
4235	AddrModes [`0`].SetCombinedField(Field: DifferentField, V: CommonValue, AddrModes);
4236	return CommonValue != nullptr;
4237	}
4238
4239	private:
4240	/// `CommonValue` may be a placeholder inserted by us.
4241	/// If the placeholder is not used, we should remove this dead instruction.
4242	void eraseCommonValueIfDead() {
4243	if (CommonValue && CommonValue->use_empty())
4244	if (Instruction *CommonInst = dyn_cast<Instruction>(Val: CommonValue))
4245	CommonInst->eraseFromParent();
4246	}
4247
4248	/// Initialize Map with anchor values. For address seen
4249	/// we set the value of different field saw in this address.
4250	/// At the same time we find a common type for different field we will
4251	/// use to create new Phi/Select nodes. Keep it in CommonType field.
4252	/// Return false if there is no common type found.
4253	bool initializeMap(FoldAddrToValueMapping &Map) {
4254	// Keep track of keys where the value is null. We will need to replace it
4255	// with constant null when we know the common type.
4256	SmallVector<Value *, `2`> NullValue;
4257	Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes [`0`].OriginalValue->getType());
4258	for (auto &AM : AddrModes) {
4259	Value *DV = AM.GetFieldAsValue(Field: DifferentField, IntPtrTy);
4260	if (DV) {
4261	auto *Type = DV->getType();
4262	if (CommonType && CommonType != Type)
4263	return false;
4264	CommonType = Type;
4265	Map [AM.OriginalValue] = DV;
4266	} else {
4267	NullValue.push_back(Elt: AM.OriginalValue);
4268	}
4269	}
4270	assert(CommonType && "At least one non-null value must be!");
4271	for (auto *V : NullValue)
4272	Map [V] = Constant::getNullValue(Ty: CommonType);
4273	return true;
4274	}
4275
4276	/// We have mapping between value A and other value B where B was a field in
4277	/// addressing mode represented by A. Also we have an original value C
4278	/// representing an address we start with. Traversing from C through phi and
4279	/// selects we ended up with A's in a map. This utility function tries to find
4280	/// a value V which is a field in addressing mode C and traversing through phi
4281	/// nodes and selects we will end up in corresponded values B in a map.
4282	/// The utility will create a new Phi/Selects if needed.
4283	// The simple example looks as follows:
4284	// BB1:
4285	// p1 = b1 + 40
4286	// br cond BB2, BB3
4287	// BB2:
4288	// p2 = b2 + 40
4289	// br BB3
4290	// BB3:
4291	// p = phi [p1, BB1], [p2, BB2]
4292	// v = load p
4293	// Map is
4294	// p1 -> b1
4295	// p2 -> b2
4296	// Request is
4297	// p -> ?
4298	// The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4299	Value *findCommon(FoldAddrToValueMapping &Map) {
4300	// Tracks the simplification of newly created phi nodes. The reason we use
4301	// this mapping is because we will add new created Phi nodes in AddrToBase.
4302	// Simplification of Phi nodes is recursive, so some Phi node may
4303	// be simplified after we added it to AddrToBase. In reality this
4304	// simplification is possible only if original phi/selects were not
4305	// simplified yet.
4306	// Using this mapping we can find the current value in AddrToBase.
4307	SimplificationTracker ST(SQ);
4308
4309	// First step, DFS to create PHI nodes for all intermediate blocks.
4310	// Also fill traverse order for the second step.
4311	SmallVector<Value *, `32`> TraverseOrder;
4312	InsertPlaceholders(Map, TraverseOrder, ST);
4313
4314	// Second Step, fill new nodes by merged values and simplify if possible.
4315	FillPlaceholders(Map, TraverseOrder, ST);
4316
4317	if (!AddrSinkNewSelects && ST.countNewSelectNodes() > `0`) {
4318	ST.destroyNewNodes(CommonType);
4319	return nullptr;
4320	}
4321
4322	// Now we'd like to match New Phi nodes to existed ones.
4323	unsigned PhiNotMatchedCount = `0`;
4324	if (!MatchPhiSet(ST, AllowNewPhiNodes: AddrSinkNewPhis, PhiNotMatchedCount)) {
4325	ST.destroyNewNodes(CommonType);
4326	return nullptr;
4327	}
4328
4329	auto *Result = ST.Get(V: Map.find(Val: Original)->second);
4330	if (Result) {
4331	NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4332	NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4333	}
4334	return Result;
4335	}
4336
4337	/// Try to match PHI node to Candidate.
4338	/// Matcher tracks the matched Phi nodes.
4339	bool MatchPhiNode(PHINode PHI, PHINode Candidate,
4340	SmallSetVector<PHIPair, `8`> &Matcher,
4341	PhiNodeSet &PhiNodesToMatch) {
4342	SmallVector<PHIPair, `8`> WorkList;
4343	Matcher.insert(X: {PHI, Candidate});
4344	SmallSet<PHINode *, `8`> MatchedPHIs;
4345	MatchedPHIs.insert(Ptr: PHI);
4346	WorkList.push_back(Elt: {PHI, Candidate});
4347	SmallSet<PHIPair, `8`> Visited;
4348	while (!WorkList.empty()) {
4349	auto Item = WorkList.pop_back_val();
4350	if (!Visited.insert(V: Item).second)
4351	continue;
4352	// We iterate over all incoming values to Phi to compare them.
4353	// If values are different and both of them Phi and the first one is a
4354	// Phi we added (subject to match) and both of them is in the same basic
4355	// block then we can match our pair if values match. So we state that
4356	// these values match and add it to work list to verify that.
4357	for (auto *B : Item.first->blocks()) {
4358	Value *FirstValue = Item.first->getIncomingValueForBlock(BB: B);
4359	Value *SecondValue = Item.second->getIncomingValueForBlock(BB: B);
4360	if (FirstValue == SecondValue)
4361	continue;
4362
4363	PHINode *FirstPhi = dyn_cast<PHINode>(Val: FirstValue);
4364	PHINode *SecondPhi = dyn_cast<PHINode>(Val: SecondValue);
4365
4366	// One of them is not Phi or
4367	// The first one is not Phi node from the set we'd like to match or
4368	// Phi nodes from different basic blocks then
4369	// we will not be able to match.
4370	if (!FirstPhi \|\| !SecondPhi \|\| !PhiNodesToMatch.count(Ptr: FirstPhi) \|\|
4371	FirstPhi->getParent() != SecondPhi->getParent())
4372	return false;
4373
4374	// If we already matched them then continue.
4375	if (Matcher.count(key: {FirstPhi, SecondPhi}))
4376	continue;
4377	// So the values are different and does not match. So we need them to
4378	// match. (But we register no more than one match per PHI node, so that
4379	// we won't later try to replace them twice.)
4380	if (MatchedPHIs.insert(Ptr: FirstPhi).second)
4381	Matcher.insert(X: {FirstPhi, SecondPhi});
4382	// But me must check it.
4383	WorkList.push_back(Elt: {FirstPhi, SecondPhi});
4384	}
4385	}
4386	return true;
4387	}
4388
4389	/// For the given set of PHI nodes (in the SimplificationTracker) try
4390	/// to find their equivalents.
4391	/// Returns false if this matching fails and creation of new Phi is disabled.
4392	bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4393	unsigned &PhiNotMatchedCount) {
4394	// Matched and PhiNodesToMatch iterate their elements in a deterministic
4395	// order, so the replacements (ReplacePhi) are also done in a deterministic
4396	// order.
4397	SmallSetVector<PHIPair, `8`> Matched;
4398	SmallPtrSet<PHINode *, `8`> WillNotMatch;
4399	PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4400	while (PhiNodesToMatch.size()) {
4401	PHINode PHI = PhiNodesToMatch.begin();
4402
4403	// Add us, if no Phi nodes in the basic block we do not match.
4404	WillNotMatch.clear();
4405	WillNotMatch.insert(Ptr: PHI);
4406
4407	// Traverse all Phis until we found equivalent or fail to do that.
4408	bool IsMatched = false;
4409	for (auto &P : PHI->getParent()->phis()) {
4410	// Skip new Phi nodes.
4411	if (PhiNodesToMatch.count(Ptr: &P))
4412	continue;
4413	if ((IsMatched = MatchPhiNode(PHI, Candidate: &P, Matcher&: Matched, PhiNodesToMatch)))
4414	break;
4415	// If it does not match, collect all Phi nodes from matcher.
4416	// if we end up with no match, them all these Phi nodes will not match
4417	// later.
4418	WillNotMatch.insert_range(R: llvm::make_first_range(c&: Matched));
4419	Matched.clear();
4420	}
4421	if (IsMatched) {
4422	// Replace all matched values and erase them.
4423	for (auto MV : Matched)
4424	ST.ReplacePhi(From: MV.first, To: MV.second);
4425	Matched.clear();
4426	continue;
4427	}
4428	// If we are not allowed to create new nodes then bail out.
4429	if (!AllowNewPhiNodes)
4430	return false;
4431	// Just remove all seen values in matcher. They will not match anything.
4432	PhiNotMatchedCount += WillNotMatch.size();
4433	for (auto *P : WillNotMatch)
4434	PhiNodesToMatch.erase(Ptr: P);
4435	}
4436	return true;
4437	}
4438	/// Fill the placeholders with values from predecessors and simplify them.
4439	void FillPlaceholders(FoldAddrToValueMapping &Map,
4440	SmallVectorImpl<Value *> &TraverseOrder,
4441	SimplificationTracker &ST) {
4442	while (!TraverseOrder.empty()) {
4443	Value *Current = TraverseOrder.pop_back_val();
4444	assert(Map.contains(Current) && "No node to fill!!!");
4445	Value *V = Map [Current];
4446
4447	if (SelectInst *Select = dyn_cast<SelectInst>(Val: V)) {
4448	// CurrentValue also must be Select.
4449	auto *CurrentSelect = cast<SelectInst>(Val: Current);
4450	auto *TrueValue = CurrentSelect->getTrueValue();
4451	assert(Map.contains(TrueValue) && "No True Value!");
4452	Select->setTrueValue(ST.Get(V: Map [TrueValue]));
4453	auto *FalseValue = CurrentSelect->getFalseValue();
4454	assert(Map.contains(FalseValue) && "No False Value!");
4455	Select->setFalseValue(ST.Get(V: Map [FalseValue]));
4456	} else {
4457	// Must be a Phi node then.
4458	auto *PHI = cast<PHINode>(Val: V);
4459	// Fill the Phi node with values from predecessors.
4460	for (auto *B : predecessors(BB: PHI->getParent())) {
4461	Value *PV = cast<PHINode>(Val: Current)->getIncomingValueForBlock(BB: B);
4462	assert(Map.contains(PV) && "No predecessor Value!");
4463	PHI->addIncoming(V: ST.Get(V: Map [PV]), BB: B);
4464	}
4465	}
4466	Map [Current] = ST.Simplify(Val: V);
4467	}
4468	}
4469
4470	/// Starting from original value recursively iterates over def-use chain up to
4471	/// known ending values represented in a map. For each traversed phi/select
4472	/// inserts a placeholder Phi or Select.
4473	/// Reports all new created Phi/Select nodes by adding them to set.
4474	/// Also reports and order in what values have been traversed.
4475	void InsertPlaceholders(FoldAddrToValueMapping &Map,
4476	SmallVectorImpl<Value *> &TraverseOrder,
4477	SimplificationTracker &ST) {
4478	SmallVector<Value *, `32`> Worklist;
4479	assert((isa<PHINode>(Original) \|\| isa<SelectInst>(Original)) &&
4480	"Address must be a Phi or Select node");
4481	auto *Dummy = PoisonValue::get(T: CommonType);
4482	Worklist.push_back(Elt: Original);
4483	while (!Worklist.empty()) {
4484	Value *Current = Worklist.pop_back_val();
4485	// if it is already visited or it is an ending value then skip it.
4486	if (Map.contains(Val: Current))
4487	continue;
4488	TraverseOrder.push_back(Elt: Current);
4489
4490	// CurrentValue must be a Phi node or select. All others must be covered
4491	// by anchors.
4492	if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Val: Current)) {
4493	// Is it OK to get metadata from OrigSelect?!
4494	// Create a Select placeholder with dummy value.
4495	SelectInst *Select =
4496	SelectInst::Create(C: CurrentSelect->getCondition(), S1: Dummy, S2: Dummy,
4497	NameStr: CurrentSelect->getName(),
4498	InsertBefore: CurrentSelect->getIterator(), MDFrom: CurrentSelect);
4499	Map [Current] = Select;
4500	ST.insertNewSelect(SI: Select);
4501	// We are interested in True and False values.
4502	Worklist.push_back(Elt: CurrentSelect->getTrueValue());
4503	Worklist.push_back(Elt: CurrentSelect->getFalseValue());
4504	} else {
4505	// It must be a Phi node then.
4506	PHINode *CurrentPhi = cast<PHINode>(Val: Current);
4507	unsigned PredCount = CurrentPhi->getNumIncomingValues();
4508	PHINode *PHI =
4509	PHINode::Create(Ty: CommonType, NumReservedValues: PredCount, NameStr: "sunk_phi", InsertBefore: CurrentPhi->getIterator());
4510	Map [Current] = PHI;
4511	ST.insertNewPhi(PN: PHI);
4512	append_range(C&: Worklist, R: CurrentPhi->incoming_values());
4513	}
4514	}
4515	}
4516
4517	bool addrModeCombiningAllowed() {
4518	if (DisableComplexAddrModes)
4519	return false;
4520	switch (DifferentField) {
4521	default:
4522	return false;
4523	case ExtAddrMode::BaseRegField:
4524	return AddrSinkCombineBaseReg;
4525	case ExtAddrMode::BaseGVField:
4526	return AddrSinkCombineBaseGV;
4527	case ExtAddrMode::BaseOffsField:
4528	return AddrSinkCombineBaseOffs;
4529	case ExtAddrMode::ScaledRegField:
4530	return AddrSinkCombineScaledReg;
4531	}
4532	}
4533	};
4534	} // end anonymous namespace
4535
4536	/// Try adding ScaleRegScale to the current addressing mode.*
4537	/// Return true and update AddrMode if this addr mode is legal for the target,
4538	/// false if not.
4539	bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4540	unsigned Depth) {
4541	// If Scale is 1, then this is the same as adding ScaleReg to the addressing
4542	// mode. Just process that directly.
4543	if (Scale == `1`)
4544	return matchAddr(Addr: ScaleReg, Depth);
4545
4546	// If the scale is 0, it takes nothing to add this.
4547	if (Scale == `0`)
4548	return true;
4549
4550	// If we already have a scale of this value, we can add to it, otherwise, we
4551	// need an available scale field.
4552	if (AddrMode.Scale != `0` && AddrMode.ScaledReg != ScaleReg)
4553	return false;
4554
4555	ExtAddrMode TestAddrMode = AddrMode;
4556
4557	// Add scale to turn X4+X3 -> X7. This could also do things like*
4558	// [A+B + A7] -> [B+A8].
4559	TestAddrMode.Scale += Scale;
4560	TestAddrMode.ScaledReg = ScaleReg;
4561
4562	// If the new address isn't legal, bail out.
4563	if (!TLI.isLegalAddressingMode(DL, AM: TestAddrMode, Ty: AccessTy, AddrSpace))
4564	return false;
4565
4566	// It was legal, so commit it.
4567	AddrMode = TestAddrMode;
4568
4569	// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4570	// to see if ScaleReg is actually X+C. If so, we can turn this into adding
4571	// XScale + CScale to addr mode. If we found available IV increment, do not
4572	// go any further: we can reuse it and cannot eliminate it.
4573	ConstantInt CI = nullptr*;
4574	Value AddLHS = nullptr*;
4575	if (isa<Instruction>(Val: ScaleReg) && // not a constant expr.
4576	match(V: ScaleReg, P: m_Add(L: m_Value(V&: AddLHS), R: m_ConstantInt(CI))) &&
4577	!isIVIncrement(V: ScaleReg, LI: &LI) && CI->getValue().isSignedIntN(N: `64`)) {
4578	TestAddrMode.InBounds = false;
4579	TestAddrMode.ScaledReg = AddLHS;
4580	TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4581
4582	// If this addressing mode is legal, commit it and remember that we folded
4583	// this instruction.
4584	if (TLI.isLegalAddressingMode(DL, AM: TestAddrMode, Ty: AccessTy, AddrSpace)) {
4585	AddrModeInsts.push_back(Elt: cast<Instruction>(Val: ScaleReg));
4586	AddrMode = TestAddrMode;
4587	return true;
4588	}
4589	// Restore status quo.
4590	TestAddrMode = AddrMode;
4591	}
4592
4593	// If this is an add recurrence with a constant step, return the increment
4594	// instruction and the canonicalized step.
4595	auto GetConstantStep =
4596	[this](const Value V) -> std::optional<std::pair<Instruction , APInt>> {
4597	auto *PN = dyn_cast<PHINode>(Val: V);
4598	if (!PN)
4599	return std::nullopt;
4600	auto IVInc = getIVIncrement(PN, LI: &LI);
4601	if (!IVInc)
4602	return std::nullopt;
4603	// TODO: The result of the intrinsics above is two-complement. However when
4604	// IV inc is expressed as add or sub, iv.next is potentially a poison value.
4605	// If it has nuw or nsw flags, we need to make sure that these flags are
4606	// inferrable at the point of memory instruction. Otherwise we are replacing
4607	// well-defined two-complement computation with poison. Currently, to avoid
4608	// potentially complex analysis needed to prove this, we reject such cases.
4609	if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(Val: IVInc ->first))
4610	if (OIVInc->hasNoSignedWrap() \|\| OIVInc->hasNoUnsignedWrap())
4611	return std::nullopt;
4612	if (auto *ConstantStep = dyn_cast<ConstantInt>(Val: IVInc ->second))
4613	return std::make_pair(x&: IVInc ->first, y: ConstantStep->getValue());
4614	return std::nullopt;
4615	};
4616
4617	// Try to account for the following special case:
4618	// 1. ScaleReg is an inductive variable;
4619	// 2. We use it with non-zero offset;
4620	// 3. IV's increment is available at the point of memory instruction.
4621	//
4622	// In this case, we may reuse the IV increment instead of the IV Phi to
4623	// achieve the following advantages:
4624	// 1. If IV step matches the offset, we will have no need in the offset;
4625	// 2. Even if they don't match, we will reduce the overlap of living IV
4626	// and IV increment, that will potentially lead to better register
4627	// assignment.
4628	if (AddrMode.BaseOffs) {
4629	if (auto IVStep = GetConstantStep (ScaleReg)) {
4630	Instruction *IVInc = IVStep ->first;
4631	// The following assert is important to ensure a lack of infinite loops.
4632	// This transforms is (intentionally) the inverse of the one just above.
4633	// If they don't agree on the definition of an increment, we'd alternate
4634	// back and forth indefinitely.
4635	assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4636	APInt Step = IVStep ->second;
4637	APInt Offset = Step * AddrMode.Scale;
4638	if (Offset.isSignedIntN(N: `64`)) {
4639	TestAddrMode.InBounds = false;
4640	TestAddrMode.ScaledReg = IVInc;
4641	TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4642	// If this addressing mode is legal, commit it..
4643	// (Note that we defer the (expensive) domtree base legality check
4644	// to the very last possible point.)
4645	if (TLI.isLegalAddressingMode(DL, AM: TestAddrMode, Ty: AccessTy, AddrSpace) &&
4646	getDTFn ().dominates(Def: IVInc, User: MemoryInst)) {
4647	AddrModeInsts.push_back(Elt: cast<Instruction>(Val: IVInc));
4648	AddrMode = TestAddrMode;
4649	return true;
4650	}
4651	// Restore status quo.
4652	TestAddrMode = AddrMode;
4653	}
4654	}
4655	}
4656
4657	// Otherwise, just return what we have.
4658	return true;
4659	}
4660
4661	/// This is a little filter, which returns true if an addressing computation
4662	/// involving I might be folded into a load/store accessing it.
4663	/// This doesn't need to be perfect, but needs to accept at least
4664	/// the set of instructions that MatchOperationAddr can.
4665	static bool MightBeFoldableInst(Instruction *I) {
4666	switch (I->getOpcode()) {
4667	case Instruction::BitCast:
4668	case Instruction::AddrSpaceCast:
4669	// Don't touch identity bitcasts.
4670	if (I->getType() == I->getOperand(i: `0`)->getType())
4671	return false;
4672	return I->getType()->isIntOrPtrTy();
4673	case Instruction::PtrToInt:
4674	// PtrToInt is always a noop, as we know that the int type is pointer sized.
4675	return true;
4676	case Instruction::IntToPtr:
4677	// We know the input is intptr_t, so this is foldable.
4678	return true;
4679	case Instruction::Add:
4680	return true;
4681	case Instruction::Mul:
4682	case Instruction::Shl:
4683	// Can only handle XC and X << C.*
4684	return isa<ConstantInt>(Val: I->getOperand(i: `1`));
4685	case Instruction::GetElementPtr:
4686	return true;
4687	default:
4688	return false;
4689	}
4690	}
4691
4692	/// Check whether or not \p Val is a legal instruction for \p TLI.
4693	/// \note \p Val is assumed to be the product of some type promotion.
4694	/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4695	/// to be legal, as the non-promoted value would have had the same state.
4696	static bool isPromotedInstructionLegal(const TargetLowering &TLI,
4697	const DataLayout &DL, Value *Val) {
4698	Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4699	if (!PromotedInst)
4700	return false;
4701	int ISDOpcode = TLI.InstructionOpcodeToISD(Opcode: PromotedInst->getOpcode());
4702	// If the ISDOpcode is undefined, it was undefined before the promotion.
4703	if (!ISDOpcode)
4704	return true;
4705	// Otherwise, check if the promoted instruction is legal or not.
4706	return TLI.isOperationLegalOrCustom(
4707	Op: ISDOpcode, VT: TLI.getValueType(DL, Ty: PromotedInst->getType()));
4708	}
4709
4710	namespace {
4711
4712	/// Hepler class to perform type promotion.
4713	class TypePromotionHelper {
4714	/// Utility function to add a promoted instruction \p ExtOpnd to
4715	/// \p PromotedInsts and record the type of extension we have seen.
4716	static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4717	Instruction ExtOpnd, bool* IsSExt) {
4718	ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4719	auto [It, Inserted] = PromotedInsts.try_emplace(Key: ExtOpnd);
4720	if (!Inserted) {
4721	// If the new extension is same as original, the information in
4722	// PromotedInsts[ExtOpnd] is still correct.
4723	if (It ->second.getInt() == ExtTy)
4724	return;
4725
4726	// Now the new extension is different from old extension, we make
4727	// the type information invalid by setting extension type to
4728	// BothExtension.
4729	ExtTy = BothExtension;
4730	}
4731	It ->second = TypeIsSExt (ExtOpnd->getType(), ExtTy);
4732	}
4733
4734	/// Utility function to query the original type of instruction \p Opnd
4735	/// with a matched extension type. If the extension doesn't match, we
4736	/// cannot use the information we had on the original type.
4737	/// BothExtension doesn't match any extension type.
4738	static const Type getOrigType(const* InstrToOrigTy &PromotedInsts,
4739	Instruction Opnd, bool* IsSExt) {
4740	ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4741	InstrToOrigTy::const_iterator It = PromotedInsts.find(Val: Opnd);
4742	if (It != PromotedInsts.end() && It ->second.getInt() == ExtTy)
4743	return It ->second.getPointer();
4744	return nullptr;
4745	}
4746
4747	/// Utility function to check whether or not a sign or zero extension
4748	/// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4749	/// either using the operands of \p Inst or promoting \p Inst.
4750	/// The type of the extension is defined by \p IsSExt.
4751	/// In other words, check if:
4752	/// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4753	/// #1 Promotion applies:
4754	/// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4755	/// #2 Operand reuses:
4756	/// ext opnd1 to ConsideredExtType.
4757	/// \p PromotedInsts maps the instructions to their type before promotion.
4758	static bool canGetThrough(const Instruction Inst, Type ConsideredExtType,
4759	const InstrToOrigTy &PromotedInsts, bool IsSExt);
4760
4761	/// Utility function to determine if \p OpIdx should be promoted when
4762	/// promoting \p Inst.
4763	static bool shouldExtOperand(const Instruction Inst, int* OpIdx) {
4764	return !(isa<SelectInst>(Val: Inst) && OpIdx == `0`);
4765	}
4766
4767	/// Utility function to promote the operand of \p Ext when this
4768	/// operand is a promotable trunc or sext or zext.
4769	/// \p PromotedInsts maps the instructions to their type before promotion.
4770	/// \p CreatedInstsCost[out] contains the cost of all instructions
4771	/// created to promote the operand of Ext.
4772	/// Newly added extensions are inserted in \p Exts.
4773	/// Newly added truncates are inserted in \p Truncs.
4774	/// Should never be called directly.
4775	/// \return The promoted value which is used instead of Ext.
4776	static Value *promoteOperandForTruncAndAnyExt(
4777	Instruction *Ext, TypePromotionTransaction &TPT,
4778	InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4779	SmallVectorImpl<Instruction > Exts,
4780	SmallVectorImpl<Instruction > Truncs, const TargetLowering &TLI);
4781
4782	/// Utility function to promote the operand of \p Ext when this
4783	/// operand is promotable and is not a supported trunc or sext.
4784	/// \p PromotedInsts maps the instructions to their type before promotion.
4785	/// \p CreatedInstsCost[out] contains the cost of all the instructions
4786	/// created to promote the operand of Ext.
4787	/// Newly added extensions are inserted in \p Exts.
4788	/// Newly added truncates are inserted in \p Truncs.
4789	/// Should never be called directly.
4790	/// \return The promoted value which is used instead of Ext.
4791	static Value promoteOperandForOther(Instruction Ext,
4792	TypePromotionTransaction &TPT,
4793	InstrToOrigTy &PromotedInsts,
4794	unsigned &CreatedInstsCost,
4795	SmallVectorImpl<Instruction > Exts,
4796	SmallVectorImpl<Instruction > Truncs,
4797	const TargetLowering &TLI, bool IsSExt);
4798
4799	/// \see promoteOperandForOther.
4800	static Value *signExtendOperandForOther(
4801	Instruction *Ext, TypePromotionTransaction &TPT,
4802	InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4803	SmallVectorImpl<Instruction > Exts,
4804	SmallVectorImpl<Instruction > Truncs, const TargetLowering &TLI) {
4805	return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4806	Exts, Truncs, TLI, IsSExt: true);
4807	}
4808
4809	/// \see promoteOperandForOther.
4810	static Value *zeroExtendOperandForOther(
4811	Instruction *Ext, TypePromotionTransaction &TPT,
4812	InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4813	SmallVectorImpl<Instruction > Exts,
4814	SmallVectorImpl<Instruction > Truncs, const TargetLowering &TLI) {
4815	return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4816	Exts, Truncs, TLI, IsSExt: false);
4817	}
4818
4819	public:
4820	/// Type for the utility function that promotes the operand of Ext.
4821	using Action = Value ()(Instruction *Ext, TypePromotionTransaction &TPT,
4822	InstrToOrigTy &PromotedInsts,
4823	unsigned &CreatedInstsCost,
4824	SmallVectorImpl<Instruction > Exts,
4825	SmallVectorImpl<Instruction > Truncs,
4826	const TargetLowering &TLI);
4827
4828	/// Given a sign/zero extend instruction \p Ext, return the appropriate
4829	/// action to promote the operand of \p Ext instead of using Ext.
4830	/// \return NULL if no promotable action is possible with the current
4831	/// sign extension.
4832	/// \p InsertedInsts keeps track of all the instructions inserted by the
4833	/// other CodeGenPrepare optimizations. This information is important
4834	/// because we do not want to promote these instructions as CodeGenPrepare
4835	/// will reinsert them later. Thus creating an infinite loop: create/remove.
4836	/// \p PromotedInsts maps the instructions to their type before promotion.
4837	static Action getAction(Instruction Ext, const* SetOfInstrs &InsertedInsts,
4838	const TargetLowering &TLI,
4839	const InstrToOrigTy &PromotedInsts);
4840	};
4841
4842	} // end anonymous namespace
4843
4844	bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4845	Type *ConsideredExtType,
4846	const InstrToOrigTy &PromotedInsts,
4847	bool IsSExt) {
4848	// The promotion helper does not know how to deal with vector types yet.
4849	// To be able to fix that, we would need to fix the places where we
4850	// statically extend, e.g., constants and such.
4851	if (Inst->getType()->isVectorTy())
4852	return false;
4853
4854	// We can always get through zext.
4855	if (isa<ZExtInst>(Val: Inst))
4856	return true;
4857
4858	// sext(sext) is ok too.
4859	if (IsSExt && isa<SExtInst>(Val: Inst))
4860	return true;
4861
4862	// We can get through binary operator, if it is legal. In other words, the
4863	// binary operator must have a nuw or nsw flag.
4864	if (const auto *BinOp = dyn_cast<BinaryOperator>(Val: Inst))
4865	if (isa<OverflowingBinaryOperator>(Val: BinOp) &&
4866	((!IsSExt && BinOp->hasNoUnsignedWrap()) \|\|
4867	(IsSExt && BinOp->hasNoSignedWrap())))
4868	return true;
4869
4870	// ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4871	if ((Inst->getOpcode() == Instruction::And \|\|
4872	Inst->getOpcode() == Instruction::Or))
4873	return true;
4874
4875	// ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4876	if (Inst->getOpcode() == Instruction::Xor) {
4877	// Make sure it is not a NOT.
4878	if (const auto *Cst = dyn_cast<ConstantInt>(Val: Inst->getOperand(i: `1`)))
4879	if (!Cst->getValue().isAllOnes())
4880	return true;
4881	}
4882
4883	// zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4884	// It may change a poisoned value into a regular value, like
4885	// zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4886	// poisoned value regular value
4887	// It should be OK since undef covers valid value.
4888	if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4889	return true;
4890
4891	// and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4892	// It may change a poisoned value into a regular value, like
4893	// zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4894	// poisoned value regular value
4895	// It should be OK since undef covers valid value.
4896	if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4897	const auto ExtInst = cast<const* Instruction>(Val: *Inst->user_begin());
4898	if (ExtInst->hasOneUse()) {
4899	const auto AndInst = dyn_cast<const* Instruction>(Val: *ExtInst->user_begin());
4900	if (AndInst && AndInst->getOpcode() == Instruction::And) {
4901	const auto *Cst = dyn_cast<ConstantInt>(Val: AndInst->getOperand(i: `1`));
4902	if (Cst &&
4903	Cst->getValue().isIntN(N: Inst->getType()->getIntegerBitWidth()))
4904	return true;
4905	}
4906	}
4907	}
4908
4909	// Check if we can do the following simplification.
4910	// ext(trunc(opnd)) --> ext(opnd)
4911	if (!isa<TruncInst>(Val: Inst))
4912	return false;
4913
4914	Value *OpndVal = Inst->getOperand(i: `0`);
4915	// Check if we can use this operand in the extension.
4916	// If the type is larger than the result type of the extension, we cannot.
4917	if (!OpndVal->getType()->isIntegerTy() \|\|
4918	OpndVal->getType()->getIntegerBitWidth() >
4919	ConsideredExtType->getIntegerBitWidth())
4920	return false;
4921
4922	// If the operand of the truncate is not an instruction, we will not have
4923	// any information on the dropped bits.
4924	// (Actually we could for constant but it is not worth the extra logic).
4925	Instruction *Opnd = dyn_cast<Instruction>(Val: OpndVal);
4926	if (!Opnd)
4927	return false;
4928
4929	// Check if the source of the type is narrow enough.
4930	// I.e., check that trunc just drops extended bits of the same kind of
4931	// the extension.
4932	// #1 get the type of the operand and check the kind of the extended bits.
4933	const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4934	if (OpndType)
4935	;
4936	else if ((IsSExt && isa<SExtInst>(Val: Opnd)) \|\| (!IsSExt && isa<ZExtInst>(Val: Opnd)))
4937	OpndType = Opnd->getOperand(i: `0`)->getType();
4938	else
4939	return false;
4940
4941	// #2 check that the truncate just drops extended bits.
4942	return Inst->getType()->getIntegerBitWidth() >=
4943	OpndType->getIntegerBitWidth();
4944	}
4945
4946	TypePromotionHelper::Action TypePromotionHelper::getAction(
4947	Instruction Ext, const* SetOfInstrs &InsertedInsts,
4948	const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4949	assert((isa<SExtInst>(Ext) \|\| isa<ZExtInst>(Ext)) &&
4950	"Unexpected instruction type");
4951	Instruction *ExtOpnd = dyn_cast<Instruction>(Val: Ext->getOperand(i: `0`));
4952	Type *ExtTy = Ext->getType();
4953	bool IsSExt = isa<SExtInst>(Val: Ext);
4954	// If the operand of the extension is not an instruction, we cannot
4955	// get through.
4956	// If it, check we can get through.
4957	if (!ExtOpnd \|\| !canGetThrough(Inst: ExtOpnd, ConsideredExtType: ExtTy, PromotedInsts, IsSExt))
4958	return nullptr;
4959
4960	// Do not promote if the operand has been added by codegenprepare.
4961	// Otherwise, it means we are undoing an optimization that is likely to be
4962	// redone, thus causing potential infinite loop.
4963	if (isa<TruncInst>(Val: ExtOpnd) && InsertedInsts.count(Ptr: ExtOpnd))
4964	return nullptr;
4965
4966	// SExt or Trunc instructions.
4967	// Return the related handler.
4968	if (isa<SExtInst>(Val: ExtOpnd) \|\| isa<TruncInst>(Val: ExtOpnd) \|\|
4969	isa<ZExtInst>(Val: ExtOpnd))
4970	return promoteOperandForTruncAndAnyExt;
4971
4972	// Regular instruction.
4973	// Abort early if we will have to insert non-free instructions.
4974	if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(FromTy: ExtTy, ToTy: ExtOpnd->getType()))
4975	return nullptr;
4976	return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4977	}
4978
4979	Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4980	Instruction *SExt, TypePromotionTransaction &TPT,
4981	InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4982	SmallVectorImpl<Instruction > Exts,
4983	SmallVectorImpl<Instruction > Truncs, const TargetLowering &TLI) {
4984	// By construction, the operand of SExt is an instruction. Otherwise we cannot
4985	// get through it and this method should not be called.
4986	Instruction *SExtOpnd = cast<Instruction>(Val: SExt->getOperand(i: `0`));
4987	Value *ExtVal = SExt;
4988	bool HasMergedNonFreeExt = false;
4989	if (isa<ZExtInst>(Val: SExtOpnd)) {
4990	// Replace s\|zext(zext(opnd))
4991	// => zext(opnd).
4992	HasMergedNonFreeExt = !TLI.isExtFree(I: SExtOpnd);
4993	Value *ZExt =
4994	TPT.createZExt(Inst: SExt, Opnd: SExtOpnd->getOperand(i: `0`), Ty: SExt->getType());
4995	TPT.replaceAllUsesWith(Inst: SExt, New: ZExt);
4996	TPT.eraseInstruction(Inst: SExt);
4997	ExtVal = ZExt;
4998	} else {
4999	// Replace z\|sext(trunc(opnd)) or sext(sext(opnd))
5000	// => z\|sext(opnd).
5001	TPT.setOperand(Inst: SExt, Idx: `0`, NewVal: SExtOpnd->getOperand(i: `0`));
5002	}
5003	CreatedInstsCost = `0`;
5004
5005	// Remove dead code.
5006	if (SExtOpnd->use_empty())
5007	TPT.eraseInstruction(Inst: SExtOpnd);
5008
5009	// Check if the extension is still needed.
5010	Instruction *ExtInst = dyn_cast<Instruction>(Val: ExtVal);
5011	if (!ExtInst \|\| ExtInst->getType() != ExtInst->getOperand(i: `0`)->getType()) {
5012	if (ExtInst) {
5013	if (Exts)
5014	Exts->push_back(Elt: ExtInst);
5015	CreatedInstsCost = !TLI.isExtFree(I: ExtInst) && !HasMergedNonFreeExt;
5016	}
5017	return ExtVal;
5018	}
5019
5020	// At this point we have: ext ty opnd to ty.
5021	// Reassign the uses of ExtInst to the opnd and remove ExtInst.
5022	Value *NextVal = ExtInst->getOperand(i: `0`);
5023	TPT.eraseInstruction(Inst: ExtInst, NewVal: NextVal);
5024	return NextVal;
5025	}
5026
5027	Value *TypePromotionHelper::promoteOperandForOther(
5028	Instruction *Ext, TypePromotionTransaction &TPT,
5029	InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5030	SmallVectorImpl<Instruction > Exts,
5031	SmallVectorImpl<Instruction > Truncs, const TargetLowering &TLI,
5032	bool IsSExt) {
5033	// By construction, the operand of Ext is an instruction. Otherwise we cannot
5034	// get through it and this method should not be called.
5035	Instruction *ExtOpnd = cast<Instruction>(Val: Ext->getOperand(i: `0`));
5036	CreatedInstsCost = `0`;
5037	if (!ExtOpnd->hasOneUse()) {
5038	// ExtOpnd will be promoted.
5039	// All its uses, but Ext, will need to use a truncated value of the
5040	// promoted version.
5041	// Create the truncate now.
5042	Value *Trunc = TPT.createTrunc(Opnd: Ext, Ty: ExtOpnd->getType());
5043	if (Instruction *ITrunc = dyn_cast<Instruction>(Val: Trunc)) {
5044	// Insert it just after the definition.
5045	ITrunc->moveAfter(MovePos: ExtOpnd);
5046	if (Truncs)
5047	Truncs->push_back(Elt: ITrunc);
5048	}
5049
5050	TPT.replaceAllUsesWith(Inst: ExtOpnd, New: Trunc);
5051	// Restore the operand of Ext (which has been replaced by the previous call
5052	// to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5053	TPT.setOperand(Inst: Ext, Idx: `0`, NewVal: ExtOpnd);
5054	}
5055
5056	// Get through the Instruction:
5057	// 1. Update its type.
5058	// 2. Replace the uses of Ext by Inst.
5059	// 3. Extend each operand that needs to be extended.
5060
5061	// Remember the original type of the instruction before promotion.
5062	// This is useful to know that the high bits are sign extended bits.
5063	addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5064	// Step #1.
5065	TPT.mutateType(Inst: ExtOpnd, NewTy: Ext->getType());
5066	// Step #2.
5067	TPT.replaceAllUsesWith(Inst: Ext, New: ExtOpnd);
5068	// Step #3.
5069	LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5070	for (int OpIdx = `0`, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5071	++OpIdx) {
5072	LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << `'\n'`);
5073	if (ExtOpnd->getOperand(i: OpIdx)->getType() == Ext->getType() \|\|
5074	!shouldExtOperand(Inst: ExtOpnd, OpIdx)) {
5075	LLVM_DEBUG(dbgs() << "No need to propagate\n");
5076	continue;
5077	}
5078	// Check if we can statically extend the operand.
5079	Value *Opnd = ExtOpnd->getOperand(i: OpIdx);
5080	if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Val: Opnd)) {
5081	LLVM_DEBUG(dbgs() << "Statically extend\n");
5082	unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5083	APInt CstVal = IsSExt ? Cst->getValue().sext(width: BitWidth)
5084	: Cst->getValue().zext(width: BitWidth);
5085	TPT.setOperand(Inst: ExtOpnd, Idx: OpIdx, NewVal: ConstantInt::get(Ty: Ext->getType(), V: CstVal));
5086	continue;
5087	}
5088	// UndefValue are typed, so we have to statically sign extend them.
5089	if (isa<UndefValue>(Val: Opnd)) {
5090	LLVM_DEBUG(dbgs() << "Statically extend\n");
5091	TPT.setOperand(Inst: ExtOpnd, Idx: OpIdx, NewVal: UndefValue::get(T: Ext->getType()));
5092	continue;
5093	}
5094
5095	// Otherwise we have to explicitly sign extend the operand.
5096	Value *ValForExtOpnd = IsSExt
5097	? TPT.createSExt(Inst: ExtOpnd, Opnd, Ty: Ext->getType())
5098	: TPT.createZExt(Inst: ExtOpnd, Opnd, Ty: Ext->getType());
5099	TPT.setOperand(Inst: ExtOpnd, Idx: OpIdx, NewVal: ValForExtOpnd);
5100	Instruction *InstForExtOpnd = dyn_cast<Instruction>(Val: ValForExtOpnd);
5101	if (!InstForExtOpnd)
5102	continue;
5103
5104	if (Exts)
5105	Exts->push_back(Elt: InstForExtOpnd);
5106
5107	CreatedInstsCost += !TLI.isExtFree(I: InstForExtOpnd);
5108	}
5109	LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5110	TPT.eraseInstruction(Inst: Ext);
5111	return ExtOpnd;
5112	}
5113
5114	/// Check whether or not promoting an instruction to a wider type is profitable.
5115	/// \p NewCost gives the cost of extension instructions created by the
5116	/// promotion.
5117	/// \p OldCost gives the cost of extension instructions before the promotion
5118	/// plus the number of instructions that have been
5119	/// matched in the addressing mode the promotion.
5120	/// \p PromotedOperand is the value that has been promoted.
5121	/// \return True if the promotion is profitable, false otherwise.
5122	bool AddressingModeMatcher::isPromotionProfitable(
5123	unsigned NewCost, unsigned OldCost, Value PromotedOperand) const* {
5124	LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5125	<< `'\n'`);
5126	// The cost of the new extensions is greater than the cost of the
5127	// old extension plus what we folded.
5128	// This is not profitable.
5129	if (NewCost > OldCost)
5130	return false;
5131	if (NewCost < OldCost)
5132	return true;
5133	// The promotion is neutral but it may help folding the sign extension in
5134	// loads for instance.
5135	// Check that we did not create an illegal instruction.
5136	return isPromotedInstructionLegal(TLI, DL, Val: PromotedOperand);
5137	}
5138
5139	/// Given an instruction or constant expr, see if we can fold the operation
5140	/// into the addressing mode. If so, update the addressing mode and return
5141	/// true, otherwise return false without modifying AddrMode.
5142	/// If \p MovedAway is not NULL, it contains the information of whether or
5143	/// not AddrInst has to be folded into the addressing mode on success.
5144	/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5145	/// because it has been moved away.
5146	/// Thus AddrInst must not be added in the matched instructions.
5147	/// This state can happen when AddrInst is a sext, since it may be moved away.
5148	/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5149	/// not be referenced anymore.
5150	bool AddressingModeMatcher::matchOperationAddr(User AddrInst, unsigned* Opcode,
5151	unsigned Depth,
5152	bool *MovedAway) {
5153	// Avoid exponential behavior on extremely deep expression trees.
5154	if (Depth >= `5`)
5155	return false;
5156
5157	// By default, all matched instructions stay in place.
5158	if (MovedAway)
5159	MovedAway = false*;
5160
5161	switch (Opcode) {
5162	case Instruction::PtrToInt:
5163	// PtrToInt is always a noop, as we know that the int type is pointer sized.
5164	return matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth);
5165	case Instruction::IntToPtr: {
5166	auto AS = AddrInst->getType()->getPointerAddressSpace();
5167	auto PtrTy = MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS));
5168	// This inttoptr is a no-op if the integer type is pointer sized.
5169	if (TLI.getValueType(DL, Ty: AddrInst->getOperand(i: `0`)->getType()) == PtrTy)
5170	return matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth);
5171	return false;
5172	}
5173	case Instruction::BitCast:
5174	// BitCast is always a noop, and we can handle it as long as it is
5175	// int->int or pointer->pointer (we don't want int<->fp or something).
5176	if (AddrInst->getOperand(i: `0`)->getType()->isIntOrPtrTy() &&
5177	// Don't touch identity bitcasts. These were probably put here by LSR,
5178	// and we don't want to mess around with them. Assume it knows what it
5179	// is doing.
5180	AddrInst->getOperand(i: `0`)->getType() != AddrInst->getType())
5181	return matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth);
5182	return false;
5183	case Instruction::AddrSpaceCast: {
5184	unsigned SrcAS =
5185	AddrInst->getOperand(i: `0`)->getType()->getPointerAddressSpace();
5186	unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5187	if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5188	return matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth);
5189	return false;
5190	}
5191	case Instruction::Add: {
5192	// Check to see if we can merge in one operand, then the other. If so, we
5193	// win.
5194	ExtAddrMode BackupAddrMode = AddrMode;
5195	unsigned OldSize = AddrModeInsts.size();
5196	// Start a transaction at this point.
5197	// The LHS may match but not the RHS.
5198	// Therefore, we need a higher level restoration point to undo partially
5199	// matched operation.
5200	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5201	TPT.getRestorationPoint();
5202
5203	// Try to match an integer constant second to increase its chance of ending
5204	// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5205	int First = `0`, Second = `1`;
5206	if (isa<ConstantInt>(Val: AddrInst->getOperand(i: First))
5207	&& !isa<ConstantInt>(Val: AddrInst->getOperand(i: Second)))
5208	std::swap(a&: First, b&: Second);
5209	AddrMode.InBounds = false;
5210	if (matchAddr(Addr: AddrInst->getOperand(i: First), Depth: Depth + `1`) &&
5211	matchAddr(Addr: AddrInst->getOperand(i: Second), Depth: Depth + `1`))
5212	return true;
5213
5214	// Restore the old addr mode info.
5215	AddrMode = BackupAddrMode;
5216	AddrModeInsts.resize(N: OldSize);
5217	TPT.rollback(Point: LastKnownGood);
5218
5219	// Otherwise this was over-aggressive. Try merging operands in the opposite
5220	// order.
5221	if (matchAddr(Addr: AddrInst->getOperand(i: Second), Depth: Depth + `1`) &&
5222	matchAddr(Addr: AddrInst->getOperand(i: First), Depth: Depth + `1`))
5223	return true;
5224
5225	// Otherwise we definitely can't merge the ADD in.
5226	AddrMode = BackupAddrMode;
5227	AddrModeInsts.resize(N: OldSize);
5228	TPT.rollback(Point: LastKnownGood);
5229	break;
5230	}
5231	// case Instruction::Or:
5232	// TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5233	// break;
5234	case Instruction::Mul:
5235	case Instruction::Shl: {
5236	// Can only handle XC and X << C.*
5237	AddrMode.InBounds = false;
5238	ConstantInt *RHS = dyn_cast<ConstantInt>(Val: AddrInst->getOperand(i: `1`));
5239	if (!RHS \|\| RHS->getBitWidth() > `64`)
5240	return false;
5241	int64_t Scale = Opcode == Instruction::Shl
5242	? `1LL` << RHS->getLimitedValue(Limit: RHS->getBitWidth() - `1`)
5243	: RHS->getSExtValue();
5244
5245	return matchScaledValue(ScaleReg: AddrInst->getOperand(i: `0`), Scale, Depth);
5246	}
5247	case Instruction::GetElementPtr: {
5248	// Scan the GEP. We check it if it contains constant offsets and at most
5249	// one variable offset.
5250	int VariableOperand = -`1`;
5251	unsigned VariableScale = `0`;
5252
5253	int64_t ConstantOffset = `0`;
5254	gep_type_iterator GTI = gep_type_begin(GEP: AddrInst);
5255	for (unsigned i = `1`, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5256	if (StructType *STy = GTI.getStructTypeOrNull()) {
5257	const StructLayout *SL = DL.getStructLayout(Ty: STy);
5258	unsigned Idx =
5259	cast<ConstantInt>(Val: AddrInst->getOperand(i))->getZExtValue();
5260	ConstantOffset += SL->getElementOffset(Idx);
5261	} else {
5262	TypeSize TS = GTI.getSequentialElementStride(DL);
5263	if (TS.isNonZero()) {
5264	// The optimisations below currently only work for fixed offsets.
5265	if (TS.isScalable())
5266	return false;
5267	int64_t TypeSize = TS.getFixedValue();
5268	if (ConstantInt *CI =
5269	dyn_cast<ConstantInt>(Val: AddrInst->getOperand(i))) {
5270	const APInt &CVal = CI->getValue();
5271	if (CVal.getSignificantBits() <= `64`) {
5272	ConstantOffset += CVal.getSExtValue() * TypeSize;
5273	continue;
5274	}
5275	}
5276	// We only allow one variable index at the moment.
5277	if (VariableOperand != -`1`)
5278	return false;
5279
5280	// Remember the variable index.
5281	VariableOperand = i;
5282	VariableScale = TypeSize;
5283	}
5284	}
5285	}
5286
5287	// A common case is for the GEP to only do a constant offset. In this case,
5288	// just add it to the disp field and check validity.
5289	if (VariableOperand == -`1`) {
5290	AddrMode.BaseOffs += ConstantOffset;
5291	if (matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth: Depth + `1`)) {
5292	if (!cast<GEPOperator>(Val: AddrInst)->isInBounds())
5293	AddrMode.InBounds = false;
5294	return true;
5295	}
5296	AddrMode.BaseOffs -= ConstantOffset;
5297
5298	if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(Val: AddrInst) &&
5299	TLI.shouldConsiderGEPOffsetSplit() && Depth == `0` &&
5300	ConstantOffset > `0`) {
5301	// Record GEPs with non-zero offsets as candidates for splitting in
5302	// the event that the offset cannot fit into the r+i addressing mode.
5303	// Simple and common case that only one GEP is used in calculating the
5304	// address for the memory access.
5305	Value *Base = AddrInst->getOperand(i: `0`);
5306	auto *BaseI = dyn_cast<Instruction>(Val: Base);
5307	auto *GEP = cast<GetElementPtrInst>(Val: AddrInst);
5308	if (isa<Argument>(Val: Base) \|\| isa<GlobalValue>(Val: Base) \|\|
5309	(BaseI && !isa<CastInst>(Val: BaseI) &&
5310	!isa<GetElementPtrInst>(Val: BaseI))) {
5311	// Make sure the parent block allows inserting non-PHI instructions
5312	// before the terminator.
5313	BasicBlock *Parent = BaseI ? BaseI->getParent()
5314	: &GEP->getFunction()->getEntryBlock();
5315	if (!Parent->getTerminator()->isEHPad())
5316	LargeOffsetGEP = std::make_pair(x&: GEP, y&: ConstantOffset);
5317	}
5318	}
5319
5320	return false;
5321	}
5322
5323	// Save the valid addressing mode in case we can't match.
5324	ExtAddrMode BackupAddrMode = AddrMode;
5325	unsigned OldSize = AddrModeInsts.size();
5326
5327	// See if the scale and offset amount is valid for this target.
5328	AddrMode.BaseOffs += ConstantOffset;
5329	if (!cast<GEPOperator>(Val: AddrInst)->isInBounds())
5330	AddrMode.InBounds = false;
5331
5332	// Match the base operand of the GEP.
5333	if (!matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth: Depth + `1`)) {
5334	// If it couldn't be matched, just stuff the value in a register.
5335	if (AddrMode.HasBaseReg) {
5336	AddrMode = BackupAddrMode;
5337	AddrModeInsts.resize(N: OldSize);
5338	return false;
5339	}
5340	AddrMode.HasBaseReg = true;
5341	AddrMode.BaseReg = AddrInst->getOperand(i: `0`);
5342	}
5343
5344	// Match the remaining variable portion of the GEP.
5345	if (!matchScaledValue(ScaleReg: AddrInst->getOperand(i: VariableOperand), Scale: VariableScale,
5346	Depth)) {
5347	// If it couldn't be matched, try stuffing the base into a register
5348	// instead of matching it, and retrying the match of the scale.
5349	AddrMode = BackupAddrMode;
5350	AddrModeInsts.resize(N: OldSize);
5351	if (AddrMode.HasBaseReg)
5352	return false;
5353	AddrMode.HasBaseReg = true;
5354	AddrMode.BaseReg = AddrInst->getOperand(i: `0`);
5355	AddrMode.BaseOffs += ConstantOffset;
5356	if (!matchScaledValue(ScaleReg: AddrInst->getOperand(i: VariableOperand),
5357	Scale: VariableScale, Depth)) {
5358	// If even that didn't work, bail.
5359	AddrMode = BackupAddrMode;
5360	AddrModeInsts.resize(N: OldSize);
5361	return false;
5362	}
5363	}
5364
5365	return true;
5366	}
5367	case Instruction::SExt:
5368	case Instruction::ZExt: {
5369	Instruction *Ext = dyn_cast<Instruction>(Val: AddrInst);
5370	if (!Ext)
5371	return false;
5372
5373	// Try to move this ext out of the way of the addressing mode.
5374	// Ask for a method for doing so.
5375	TypePromotionHelper::Action TPH =
5376	TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5377	if (!TPH)
5378	return false;
5379
5380	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5381	TPT.getRestorationPoint();
5382	unsigned CreatedInstsCost = `0`;
5383	unsigned ExtCost = !TLI.isExtFree(I: Ext);
5384	Value *PromotedOperand =
5385	TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5386	// SExt has been moved away.
5387	// Thus either it will be rematched later in the recursive calls or it is
5388	// gone. Anyway, we must not fold it into the addressing mode at this point.
5389	// E.g.,
5390	// op = add opnd, 1
5391	// idx = ext op
5392	// addr = gep base, idx
5393	// is now:
5394	// promotedOpnd = ext opnd <- no match here
5395	// op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5396	// addr = gep base, op <- match
5397	if (MovedAway)
5398	MovedAway = true*;
5399
5400	assert(PromotedOperand &&
5401	"TypePromotionHelper should have filtered out those cases");
5402
5403	ExtAddrMode BackupAddrMode = AddrMode;
5404	unsigned OldSize = AddrModeInsts.size();
5405
5406	if (!matchAddr(Addr: PromotedOperand, Depth) \|\|
5407	// The total of the new cost is equal to the cost of the created
5408	// instructions.
5409	// The total of the old cost is equal to the cost of the extension plus
5410	// what we have saved in the addressing mode.
5411	!isPromotionProfitable(NewCost: CreatedInstsCost,
5412	OldCost: ExtCost + (AddrModeInsts.size() - OldSize),
5413	PromotedOperand)) {
5414	AddrMode = BackupAddrMode;
5415	AddrModeInsts.resize(N: OldSize);
5416	LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5417	TPT.rollback(Point: LastKnownGood);
5418	return false;
5419	}
5420
5421	// SExt has been deleted. Make sure it is not referenced by the AddrMode.
5422	AddrMode.replaceWith(From: Ext, To: PromotedOperand);
5423	return true;
5424	}
5425	case Instruction::Call:
5426	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: AddrInst)) {
5427	if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5428	GlobalValue &GV = cast<GlobalValue>(Val&: *II->getArgOperand(i: `0`));
5429	if (TLI.addressingModeSupportsTLS(GV))
5430	return matchAddr(Addr: AddrInst->getOperand(i: `0`), Depth);
5431	}
5432	}
5433	break;
5434	}
5435	return false;
5436	}
5437
5438	/// If we can, try to add the value of 'Addr' into the current addressing mode.
5439	/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5440	/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5441	/// for the target.
5442	///
5443	bool AddressingModeMatcher::matchAddr(Value Addr, unsigned* Depth) {
5444	// Start a transaction at this point that we will rollback if the matching
5445	// fails.
5446	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5447	TPT.getRestorationPoint();
5448	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Addr)) {
5449	if (CI->getValue().isSignedIntN(N: `64`)) {
5450	// Check if the addition would result in a signed overflow.
5451	int64_t Result;
5452	bool Overflow =
5453	AddOverflow(X: AddrMode.BaseOffs, Y: CI->getSExtValue(), Result);
5454	if (!Overflow) {
5455	// Fold in immediates if legal for the target.
5456	AddrMode.BaseOffs = Result;
5457	if (TLI.isLegalAddressingMode(DL, AM: AddrMode, Ty: AccessTy, AddrSpace))
5458	return true;
5459	AddrMode.BaseOffs -= CI->getSExtValue();
5460	}
5461	}
5462	} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val: Addr)) {
5463	// If this is a global variable, try to fold it into the addressing mode.
5464	if (!AddrMode.BaseGV) {
5465	AddrMode.BaseGV = GV;
5466	if (TLI.isLegalAddressingMode(DL, AM: AddrMode, Ty: AccessTy, AddrSpace))
5467	return true;
5468	AddrMode.BaseGV = nullptr;
5469	}
5470	} else if (Instruction *I = dyn_cast<Instruction>(Val: Addr)) {
5471	ExtAddrMode BackupAddrMode = AddrMode;
5472	unsigned OldSize = AddrModeInsts.size();
5473
5474	// Check to see if it is possible to fold this operation.
5475	bool MovedAway = false;
5476	if (matchOperationAddr(AddrInst: I, Opcode: I->getOpcode(), Depth, MovedAway: &MovedAway)) {
5477	// This instruction may have been moved away. If so, there is nothing
5478	// to check here.
5479	if (MovedAway)
5480	return true;
5481	// Okay, it's possible to fold this. Check to see if it is actually
5482	// profitable* to do so. We use a simple cost model to avoid increasing*
5483	// register pressure too much.
5484	if (I->hasOneUse() \|\|
5485	isProfitableToFoldIntoAddressingMode(I, AMBefore&: BackupAddrMode, AMAfter&: AddrMode)) {
5486	AddrModeInsts.push_back(Elt: I);
5487	return true;
5488	}
5489
5490	// It isn't profitable to do this, roll back.
5491	AddrMode = BackupAddrMode;
5492	AddrModeInsts.resize(N: OldSize);
5493	TPT.rollback(Point: LastKnownGood);
5494	}
5495	} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: Addr)) {
5496	if (matchOperationAddr(AddrInst: CE, Opcode: CE->getOpcode(), Depth))
5497	return true;
5498	TPT.rollback(Point: LastKnownGood);
5499	} else if (isa<ConstantPointerNull>(Val: Addr)) {
5500	// Null pointer gets folded without affecting the addressing mode.
5501	return true;
5502	}
5503
5504	// Worse case, the target should support [reg] addressing modes. :)
5505	if (!AddrMode.HasBaseReg) {
5506	AddrMode.HasBaseReg = true;
5507	AddrMode.BaseReg = Addr;
5508	// Still check for legality in case the target supports [imm] but not [i+r].
5509	if (TLI.isLegalAddressingMode(DL, AM: AddrMode, Ty: AccessTy, AddrSpace))
5510	return true;
5511	AddrMode.HasBaseReg = false;
5512	AddrMode.BaseReg = nullptr;
5513	}
5514
5515	// If the base register is already taken, see if we can do [r+r].
5516	if (AddrMode.Scale == `0`) {
5517	AddrMode.Scale = `1`;
5518	AddrMode.ScaledReg = Addr;
5519	if (TLI.isLegalAddressingMode(DL, AM: AddrMode, Ty: AccessTy, AddrSpace))
5520	return true;
5521	AddrMode.Scale = `0`;
5522	AddrMode.ScaledReg = nullptr;
5523	}
5524	// Couldn't match.
5525	TPT.rollback(Point: LastKnownGood);
5526	return false;
5527	}
5528
5529	/// Check to see if all uses of OpVal by the specified inline asm call are due
5530	/// to memory operands. If so, return true, otherwise return false.
5531	static bool IsOperandAMemoryOperand(CallInst CI, InlineAsm IA, Value *OpVal,
5532	const TargetLowering &TLI,
5533	const TargetRegisterInfo &TRI) {
5534	const Function *F = CI->getFunction();
5535	TargetLowering::AsmOperandInfoVector TargetConstraints =
5536	TLI.ParseConstraints(DL: F->getDataLayout(), TRI: &TRI, Call: *CI);
5537
5538	for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5539	// Compute the constraint code and ConstraintType to use.
5540	TLI.ComputeConstraintToUse(OpInfo, Op: SDValue ());
5541
5542	// If this asm operand is our Value, and if it isn't an indirect memory*
5543	// operand, we can't fold it! TODO: Also handle C_Address?
5544	if (OpInfo.CallOperandVal == OpVal &&
5545	(OpInfo.ConstraintType != TargetLowering::C_Memory \|\|
5546	!OpInfo.isIndirect))
5547	return false;
5548	}
5549
5550	return true;
5551	}
5552
5553	/// Recursively walk all the uses of I until we find a memory use.
5554	/// If we find an obviously non-foldable instruction, return true.
5555	/// Add accessed addresses and types to MemoryUses.
5556	static bool FindAllMemoryUses(
5557	Instruction I, SmallVectorImpl<std::pair<Use , Type *>> &MemoryUses,
5558	SmallPtrSetImpl<Instruction > &ConsideredInsts, const* TargetLowering &TLI,
5559	const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5560	BlockFrequencyInfo BFI, unsigned* &SeenInsts) {
5561	// If we already considered this instruction, we're done.
5562	if (!ConsideredInsts.insert(Ptr: I).second)
5563	return false;
5564
5565	// If this is an obviously unfoldable instruction, bail out.
5566	if (!MightBeFoldableInst(I))
5567	return true;
5568
5569	// Loop over all the uses, recursively processing them.
5570	for (Use &U : I->uses()) {
5571	// Conservatively return true if we're seeing a large number or a deep chain
5572	// of users. This avoids excessive compilation times in pathological cases.
5573	if (SeenInsts++ >= MaxAddressUsersToScan)
5574	return true;
5575
5576	Instruction *UserI = cast<Instruction>(Val: U.getUser());
5577	if (LoadInst *LI = dyn_cast<LoadInst>(Val: UserI)) {
5578	MemoryUses.push_back(Elt: {&U, LI->getType()});
5579	continue;
5580	}
5581
5582	if (StoreInst *SI = dyn_cast<StoreInst>(Val: UserI)) {
5583	if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5584	return true; // Storing addr, not into addr.
5585	MemoryUses.push_back(Elt: {&U, SI->getValueOperand()->getType()});
5586	continue;
5587	}
5588
5589	if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: UserI)) {
5590	if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5591	return true; // Storing addr, not into addr.
5592	MemoryUses.push_back(Elt: {&U, RMW->getValOperand()->getType()});
5593	continue;
5594	}
5595
5596	if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: UserI)) {
5597	if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5598	return true; // Storing addr, not into addr.
5599	MemoryUses.push_back(Elt: {&U, CmpX->getCompareOperand()->getType()});
5600	continue;
5601	}
5602
5603	if (CallInst *CI = dyn_cast<CallInst>(Val: UserI)) {
5604	if (CI->hasFnAttr(Attribute::Cold)) {
5605	// If this is a cold call, we can sink the addressing calculation into
5606	// the cold path. See optimizeCallInst
5607	if (!llvm::shouldOptimizeForSize(BB: CI->getParent(), PSI, BFI))
5608	continue;
5609	}
5610
5611	InlineAsm *IA = dyn_cast<InlineAsm>(Val: CI->getCalledOperand());
5612	if (!IA)
5613	return true;
5614
5615	// If this is a memory operand, we're cool, otherwise bail out.
5616	if (!IsOperandAMemoryOperand(CI, IA, OpVal: I, TLI, TRI))
5617	return true;
5618	continue;
5619	}
5620
5621	if (FindAllMemoryUses(I: UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5622	PSI, BFI, SeenInsts))
5623	return true;
5624	}
5625
5626	return false;
5627	}
5628
5629	static bool FindAllMemoryUses(
5630	Instruction I, SmallVectorImpl<std::pair<Use , Type *>> &MemoryUses,
5631	const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5632	ProfileSummaryInfo PSI, BlockFrequencyInfo BFI) {
5633	unsigned SeenInsts = `0`;
5634	SmallPtrSet<Instruction *, `16`> ConsideredInsts;
5635	return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5636	PSI, BFI, SeenInsts);
5637	}
5638
5639
5640	/// Return true if Val is already known to be live at the use site that we're
5641	/// folding it into. If so, there is no cost to include it in the addressing
5642	/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5643	/// instruction already.
5644	bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5645	Value *KnownLive1,
5646	Value *KnownLive2) {
5647	// If Val is either of the known-live values, we know it is live!
5648	if (Val == nullptr \|\| Val == KnownLive1 \|\| Val == KnownLive2)
5649	return true;
5650
5651	// All values other than instructions and arguments (e.g. constants) are live.
5652	if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5653	return true;
5654
5655	// If Val is a constant sized alloca in the entry block, it is live, this is
5656	// true because it is just a reference to the stack/frame pointer, which is
5657	// live for the whole function.
5658	if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5659	if (AI->isStaticAlloca())
5660	return true;
5661
5662	// Check to see if this value is already used in the memory instruction's
5663	// block. If so, it's already live into the block at the very least, so we
5664	// can reasonably fold it.
5665	return Val->isUsedInBasicBlock(BB: MemoryInst->getParent());
5666	}
5667
5668	/// It is possible for the addressing mode of the machine to fold the specified
5669	/// instruction into a load or store that ultimately uses it.
5670	/// However, the specified instruction has multiple uses.
5671	/// Given this, it may actually increase register pressure to fold it
5672	/// into the load. For example, consider this code:
5673	///
5674	/// X = ...
5675	/// Y = X+1
5676	/// use(Y) -> nonload/store
5677	/// Z = Y+1
5678	/// load Z
5679	///
5680	/// In this case, Y has multiple uses, and can be folded into the load of Z
5681	/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5682	/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5683	/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5684	/// number of computations either.
5685	///
5686	/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5687	/// X was live across 'load Z' for other reasons, we actually would* want to*
5688	/// fold the addressing mode in the Z case. This would make Y die earlier.
5689	bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5690	Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5691	if (IgnoreProfitability)
5692	return true;
5693
5694	// AMBefore is the addressing mode before this instruction was folded into it,
5695	// and AMAfter is the addressing mode after the instruction was folded. Get
5696	// the set of registers referenced by AMAfter and subtract out those
5697	// referenced by AMBefore: this is the set of values which folding in this
5698	// address extends the lifetime of.
5699	//
5700	// Note that there are only two potential values being referenced here,
5701	// BaseReg and ScaleReg (global addresses are always available, as are any
5702	// folded immediates).
5703	Value BaseReg = AMAfter.BaseReg, ScaledReg = AMAfter.ScaledReg;
5704
5705	// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5706	// lifetime wasn't extended by adding this instruction.
5707	if (valueAlreadyLiveAtInst(Val: BaseReg, KnownLive1: AMBefore.BaseReg, KnownLive2: AMBefore.ScaledReg))
5708	BaseReg = nullptr;
5709	if (valueAlreadyLiveAtInst(Val: ScaledReg, KnownLive1: AMBefore.BaseReg, KnownLive2: AMBefore.ScaledReg))
5710	ScaledReg = nullptr;
5711
5712	// If folding this instruction (and it's subexprs) didn't extend any live
5713	// ranges, we're ok with it.
5714	if (!BaseReg && !ScaledReg)
5715	return true;
5716
5717	// If all uses of this instruction can have the address mode sunk into them,
5718	// we can remove the addressing mode and effectively trade one live register
5719	// for another (at worst.) In this context, folding an addressing mode into
5720	// the use is just a particularly nice way of sinking it.
5721	SmallVector<std::pair<Use , Type >, `16`> MemoryUses;
5722	if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5723	return false; // Has a non-memory, non-foldable use!
5724
5725	// Now that we know that all uses of this instruction are part of a chain of
5726	// computation involving only operations that could theoretically be folded
5727	// into a memory use, loop over each of these memory operation uses and see
5728	// if they could actually* fold the instruction. The assumption is that*
5729	// addressing modes are cheap and that duplicating the computation involved
5730	// many times is worthwhile, even on a fastpath. For sinking candidates
5731	// (i.e. cold call sites), this serves as a way to prevent excessive code
5732	// growth since most architectures have some reasonable small and fast way to
5733	// compute an effective address. (i.e LEA on x86)
5734	SmallVector<Instruction *, `32`> MatchedAddrModeInsts;
5735	for (const std::pair<Use , Type > &Pair : MemoryUses) {
5736	Value *Address = Pair.first->get();
5737	Instruction *UserI = cast<Instruction>(Val: Pair.first->getUser());
5738	Type *AddressAccessTy = Pair.second;
5739	unsigned AS = Address->getType()->getPointerAddressSpace();
5740
5741	// Do a match against the root of this address, ignoring profitability. This
5742	// will tell us if the addressing mode for the memory operation will
5743	// actually* cover the shared instruction.*
5744	ExtAddrMode Result;
5745	std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5746	`0`);
5747	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5748	TPT.getRestorationPoint();
5749	AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5750	AddressAccessTy, AS, UserI, Result,
5751	InsertedInsts, PromotedInsts, TPT,
5752	LargeOffsetGEP, OptSize, PSI, BFI);
5753	Matcher.IgnoreProfitability = true;
5754	bool Success = Matcher.matchAddr(Addr: Address, Depth: `0`);
5755	(void)Success;
5756	assert(Success && "Couldn't select anything?");
5757
5758	// The match was to check the profitability, the changes made are not
5759	// part of the original matcher. Therefore, they should be dropped
5760	// otherwise the original matcher will not present the right state.
5761	TPT.rollback(Point: LastKnownGood);
5762
5763	// If the match didn't cover I, then it won't be shared by it.
5764	if (!is_contained(Range&: MatchedAddrModeInsts, Element: I))
5765	return false;
5766
5767	MatchedAddrModeInsts.clear();
5768	}
5769
5770	return true;
5771	}
5772
5773	/// Return true if the specified values are defined in a
5774	/// different basic block than BB.
5775	static bool IsNonLocalValue(Value V, BasicBlock BB) {
5776	if (Instruction *I = dyn_cast<Instruction>(Val: V))
5777	return I->getParent() != BB;
5778	return false;
5779	}
5780
5781	// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5782	// is the first instruction that will use Addr. So we need to find the first
5783	// user of Addr in current BB.
5784	static BasicBlock::iterator findInsertPos(Value Addr, Instruction MemoryInst,
5785	Value *SunkAddr) {
5786	if (Addr->hasOneUse())
5787	return MemoryInst->getIterator();
5788
5789	// We already have a SunkAddr in current BB, but we may need to insert cast
5790	// instruction after it.
5791	if (SunkAddr) {
5792	if (Instruction *AddrInst = dyn_cast<Instruction>(Val: SunkAddr))
5793	return std::next(x: AddrInst->getIterator());
5794	}
5795
5796	// Find the first user of Addr in current BB.
5797	Instruction *Earliest = MemoryInst;
5798	for (User *U : Addr->users()) {
5799	Instruction *UserInst = dyn_cast<Instruction>(Val: U);
5800	if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5801	if (isa<PHINode>(Val: UserInst) \|\| UserInst->isDebugOrPseudoInst())
5802	continue;
5803	if (UserInst->comesBefore(Other: Earliest))
5804	Earliest = UserInst;
5805	}
5806	}
5807	return Earliest->getIterator();
5808	}
5809
5810	/// Sink addressing mode computation immediate before MemoryInst if doing so
5811	/// can be done without increasing register pressure. The need for the
5812	/// register pressure constraint means this can end up being an all or nothing
5813	/// decision for all uses of the same addressing computation.
5814	///
5815	/// Load and Store Instructions often have addressing modes that can do
5816	/// significant amounts of computation. As such, instruction selection will try
5817	/// to get the load or store to do as much computation as possible for the
5818	/// program. The problem is that isel can only see within a single block. As
5819	/// such, we sink as much legal addressing mode work into the block as possible.
5820	///
5821	/// This method is used to optimize both load/store and inline asms with memory
5822	/// operands. It's also used to sink addressing computations feeding into cold
5823	/// call sites into their (cold) basic block.
5824	///
5825	/// The motivation for handling sinking into cold blocks is that doing so can
5826	/// both enable other address mode sinking (by satisfying the register pressure
5827	/// constraint above), and reduce register pressure globally (by removing the
5828	/// addressing mode computation from the fast path entirely.).
5829	bool CodeGenPrepare::optimizeMemoryInst(Instruction MemoryInst, Value Addr,
5830	Type AccessTy, unsigned* AddrSpace) {
5831	Value *Repl = Addr;
5832
5833	// Try to collapse single-value PHI nodes. This is necessary to undo
5834	// unprofitable PRE transformations.
5835	SmallVector<Value *, `8`> worklist;
5836	SmallPtrSet<Value *, `16`> Visited;
5837	worklist.push_back(Elt: Addr);
5838
5839	// Use a worklist to iteratively look through PHI and select nodes, and
5840	// ensure that the addressing mode obtained from the non-PHI/select roots of
5841	// the graph are compatible.
5842	bool PhiOrSelectSeen = false;
5843	SmallVector<Instruction *, `16`> AddrModeInsts;
5844	const SimplifyQuery SQ(*DL, TLInfo);
5845	AddressingModeCombiner AddrModes(SQ, Addr);
5846	TypePromotionTransaction TPT(RemovedInsts);
5847	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5848	TPT.getRestorationPoint();
5849	while (!worklist.empty()) {
5850	Value *V = worklist.pop_back_val();
5851
5852	// We allow traversing cyclic Phi nodes.
5853	// In case of success after this loop we ensure that traversing through
5854	// Phi nodes ends up with all cases to compute address of the form
5855	// BaseGV + Base + Scale Index + Offset*
5856	// where Scale and Offset are constans and BaseGV, Base and Index
5857	// are exactly the same Values in all cases.
5858	// It means that BaseGV, Scale and Offset dominate our memory instruction
5859	// and have the same value as they had in address computation represented
5860	// as Phi. So we can safely sink address computation to memory instruction.
5861	if (!Visited.insert(Ptr: V).second)
5862	continue;
5863
5864	// For a PHI node, push all of its incoming values.
5865	if (PHINode *P = dyn_cast<PHINode>(Val: V)) {
5866	append_range(C&: worklist, R: P->incoming_values());
5867	PhiOrSelectSeen = true;
5868	continue;
5869	}
5870	// Similar for select.
5871	if (SelectInst *SI = dyn_cast<SelectInst>(Val: V)) {
5872	worklist.push_back(Elt: SI->getFalseValue());
5873	worklist.push_back(Elt: SI->getTrueValue());
5874	PhiOrSelectSeen = true;
5875	continue;
5876	}
5877
5878	// For non-PHIs, determine the addressing mode being computed. Note that
5879	// the result may differ depending on what other uses our candidate
5880	// addressing instructions might have.
5881	AddrModeInsts.clear();
5882	std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5883	`0`);
5884	// Defer the query (and possible computation of) the dom tree to point of
5885	// actual use. It's expected that most address matches don't actually need
5886	// the domtree.
5887	auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5888	Function *F = MemoryInst->getParent()->getParent();
5889	return this->getDT(F&: *F);
5890	};
5891	ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5892	V, AccessTy, AS: AddrSpace, MemoryInst, AddrModeInsts, TLI: TLI, LI: LI, getDTFn,
5893	TRI: *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5894	BFI: BFI.get());
5895
5896	GetElementPtrInst *GEP = LargeOffsetGEP.first;
5897	if (GEP && !NewGEPBases.count(V: GEP)) {
5898	// If splitting the underlying data structure can reduce the offset of a
5899	// GEP, collect the GEP. Skip the GEPs that are the new bases of
5900	// previously split data structures.
5901	LargeOffsetGEPMap [GEP->getPointerOperand()].push_back(Elt: LargeOffsetGEP);
5902	LargeOffsetGEPID.insert(KV: std::make_pair(x&: GEP, y: LargeOffsetGEPID.size()));
5903	}
5904
5905	NewAddrMode.OriginalValue = V;
5906	if (!AddrModes.addNewAddrMode(NewAddrMode))
5907	break;
5908	}
5909
5910	// Try to combine the AddrModes we've collected. If we couldn't collect any,
5911	// or we have multiple but either couldn't combine them or combining them
5912	// wouldn't do anything useful, bail out now.
5913	if (!AddrModes.combineAddrModes()) {
5914	TPT.rollback(Point: LastKnownGood);
5915	return false;
5916	}
5917	bool Modified = TPT.commit();
5918
5919	// Get the combined AddrMode (or the only AddrMode, if we only had one).
5920	ExtAddrMode AddrMode = AddrModes.getAddrMode();
5921
5922	// If all the instructions matched are already in this BB, don't do anything.
5923	// If we saw a Phi node then it is not local definitely, and if we saw a
5924	// select then we want to push the address calculation past it even if it's
5925	// already in this BB.
5926	if (!PhiOrSelectSeen && none_of(Range&: AddrModeInsts, P: [&](Value *V) {
5927	return IsNonLocalValue(V, BB: MemoryInst->getParent());
5928	})) {
5929	LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5930	<< "\n");
5931	return Modified;
5932	}
5933
5934	// Now that we determined the addressing expression we want to use and know
5935	// that we have to sink it into this block. Check to see if we have already
5936	// done this for some other load/store instr in this block. If so, reuse
5937	// the computation. Before attempting reuse, check if the address is valid
5938	// as it may have been erased.
5939
5940	WeakTrackingVH SunkAddrVH = SunkAddrs [Addr];
5941
5942	Value SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr*;
5943	Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5944
5945	// The current BB may be optimized multiple times, we can't guarantee the
5946	// reuse of Addr happens later, call findInsertPos to find an appropriate
5947	// insert position.
5948	auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5949
5950	// TODO: Adjust insert point considering (Base\|Scaled)Reg if possible.
5951	if (!SunkAddr) {
5952	auto &DT = getDT(F&: *MemoryInst->getFunction());
5953	if ((AddrMode.BaseReg && !DT.dominates(Def: AddrMode.BaseReg, User: &*InsertPos)) \|\|
5954	(AddrMode.ScaledReg && !DT.dominates(Def: AddrMode.ScaledReg, User: &*InsertPos)))
5955	return Modified;
5956	}
5957
5958	IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5959
5960	if (SunkAddr) {
5961	LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5962	<< " for " << *MemoryInst << "\n");
5963	if (SunkAddr->getType() != Addr->getType()) {
5964	if (SunkAddr->getType()->getPointerAddressSpace() !=
5965	Addr->getType()->getPointerAddressSpace() &&
5966	!DL->isNonIntegralPointerType(Ty: Addr->getType())) {
5967	// There are two reasons the address spaces might not match: a no-op
5968	// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5969	// ptrtoint/inttoptr pair to ensure we match the original semantics.
5970	// TODO: allow bitcast between different address space pointers with the
5971	// same size.
5972	SunkAddr = Builder.CreatePtrToInt(V: SunkAddr, DestTy: IntPtrTy, Name: "sunkaddr");
5973	SunkAddr =
5974	Builder.CreateIntToPtr(V: SunkAddr, DestTy: Addr->getType(), Name: "sunkaddr");
5975	} else
5976	SunkAddr = Builder.CreatePointerCast(V: SunkAddr, DestTy: Addr->getType());
5977	}
5978	} else if (AddrSinkUsingGEPs \|\| (!AddrSinkUsingGEPs.getNumOccurrences() &&
5979	SubtargetInfo->addrSinkUsingGEPs())) {
5980	// By default, we use the GEP-based method when AA is used later. This
5981	// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5982	LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5983	<< " for " << *MemoryInst << "\n");
5984	Value ResultPtr = nullptr, ResultIndex = nullptr;
5985
5986	// First, find the pointer.
5987	if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5988	ResultPtr = AddrMode.BaseReg;
5989	AddrMode.BaseReg = nullptr;
5990	}
5991
5992	if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5993	// We can't add more than one pointer together, nor can we scale a
5994	// pointer (both of which seem meaningless).
5995	if (ResultPtr \|\| AddrMode.Scale != `1`)
5996	return Modified;
5997
5998	ResultPtr = AddrMode.ScaledReg;
5999	AddrMode.Scale = `0`;
6000	}
6001
6002	// It is only safe to sign extend the BaseReg if we know that the math
6003	// required to create it did not overflow before we extend it. Since
6004	// the original IR value was tossed in favor of a constant back when
6005	// the AddrMode was created we need to bail out gracefully if widths
6006	// do not match instead of extending it.
6007	//
6008	// (See below for code to add the scale.)
6009	if (AddrMode.Scale) {
6010	Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6011	if (cast<IntegerType>(Val: IntPtrTy)->getBitWidth() >
6012	cast<IntegerType>(Val: ScaledRegTy)->getBitWidth())
6013	return Modified;
6014	}
6015
6016	GlobalValue *BaseGV = AddrMode.BaseGV;
6017	if (BaseGV != nullptr) {
6018	if (ResultPtr)
6019	return Modified;
6020
6021	if (BaseGV->isThreadLocal()) {
6022	ResultPtr = Builder.CreateThreadLocalAddress(Ptr: BaseGV);
6023	} else {
6024	ResultPtr = BaseGV;
6025	}
6026	}
6027
6028	// If the real base value actually came from an inttoptr, then the matcher
6029	// will look through it and provide only the integer value. In that case,
6030	// use it here.
6031	if (!DL->isNonIntegralPointerType(Ty: Addr->getType())) {
6032	if (!ResultPtr && AddrMode.BaseReg) {
6033	ResultPtr = Builder.CreateIntToPtr(V: AddrMode.BaseReg, DestTy: Addr->getType(),
6034	Name: "sunkaddr");
6035	AddrMode.BaseReg = nullptr;
6036	} else if (!ResultPtr && AddrMode.Scale == `1`) {
6037	ResultPtr = Builder.CreateIntToPtr(V: AddrMode.ScaledReg, DestTy: Addr->getType(),
6038	Name: "sunkaddr");
6039	AddrMode.Scale = `0`;
6040	}
6041	}
6042
6043	if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6044	!AddrMode.BaseOffs) {
6045	SunkAddr = Constant::getNullValue(Ty: Addr->getType());
6046	} else if (!ResultPtr) {
6047	return Modified;
6048	} else {
6049	Type *I8PtrTy =
6050	Builder.getPtrTy(AddrSpace: Addr->getType()->getPointerAddressSpace());
6051
6052	// Start with the base register. Do this first so that subsequent address
6053	// matching finds it last, which will prevent it from trying to match it
6054	// as the scaled value in case it happens to be a mul. That would be
6055	// problematic if we've sunk a different mul for the scale, because then
6056	// we'd end up sinking both muls.
6057	if (AddrMode.BaseReg) {
6058	Value *V = AddrMode.BaseReg;
6059	if (V->getType() != IntPtrTy)
6060	V = Builder.CreateIntCast(V, DestTy: IntPtrTy, /isSigned=/true, Name: "sunkaddr");
6061
6062	ResultIndex = V;
6063	}
6064
6065	// Add the scale value.
6066	if (AddrMode.Scale) {
6067	Value *V = AddrMode.ScaledReg;
6068	if (V->getType() == IntPtrTy) {
6069	// done.
6070	} else {
6071	assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6072	cast<IntegerType>(V->getType())->getBitWidth() &&
6073	"We can't transform if ScaledReg is too narrow");
6074	V = Builder.CreateTrunc(V, DestTy: IntPtrTy, Name: "sunkaddr");
6075	}
6076
6077	if (AddrMode.Scale != `1`)
6078	V = Builder.CreateMul(LHS: V, RHS: ConstantInt::get(Ty: IntPtrTy, V: AddrMode.Scale),
6079	Name: "sunkaddr");
6080	if (ResultIndex)
6081	ResultIndex = Builder.CreateAdd(LHS: ResultIndex, RHS: V, Name: "sunkaddr");
6082	else
6083	ResultIndex = V;
6084	}
6085
6086	// Add in the Base Offset if present.
6087	if (AddrMode.BaseOffs) {
6088	Value *V = ConstantInt::get(Ty: IntPtrTy, V: AddrMode.BaseOffs);
6089	if (ResultIndex) {
6090	// We need to add this separately from the scale above to help with
6091	// SDAG consecutive load/store merging.
6092	if (ResultPtr->getType() != I8PtrTy)
6093	ResultPtr = Builder.CreatePointerCast(V: ResultPtr, DestTy: I8PtrTy);
6094	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset: ResultIndex, Name: "sunkaddr",
6095	NW: AddrMode.InBounds);
6096	}
6097
6098	ResultIndex = V;
6099	}
6100
6101	if (!ResultIndex) {
6102	SunkAddr = ResultPtr;
6103	} else {
6104	if (ResultPtr->getType() != I8PtrTy)
6105	ResultPtr = Builder.CreatePointerCast(V: ResultPtr, DestTy: I8PtrTy);
6106	SunkAddr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset: ResultIndex, Name: "sunkaddr",
6107	NW: AddrMode.InBounds);
6108	}
6109
6110	if (SunkAddr->getType() != Addr->getType()) {
6111	if (SunkAddr->getType()->getPointerAddressSpace() !=
6112	Addr->getType()->getPointerAddressSpace() &&
6113	!DL->isNonIntegralPointerType(Ty: Addr->getType())) {
6114	// There are two reasons the address spaces might not match: a no-op
6115	// addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6116	// ptrtoint/inttoptr pair to ensure we match the original semantics.
6117	// TODO: allow bitcast between different address space pointers with
6118	// the same size.
6119	SunkAddr = Builder.CreatePtrToInt(V: SunkAddr, DestTy: IntPtrTy, Name: "sunkaddr");
6120	SunkAddr =
6121	Builder.CreateIntToPtr(V: SunkAddr, DestTy: Addr->getType(), Name: "sunkaddr");
6122	} else
6123	SunkAddr = Builder.CreatePointerCast(V: SunkAddr, DestTy: Addr->getType());
6124	}
6125	}
6126	} else {
6127	// We'd require a ptrtoint/inttoptr down the line, which we can't do for
6128	// non-integral pointers, so in that case bail out now.
6129	Type BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr*;
6130	Type ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr*;
6131	PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(Val: BaseTy);
6132	PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(Val: ScaleTy);
6133	if (DL->isNonIntegralPointerType(Ty: Addr->getType()) \|\|
6134	(BasePtrTy && DL->isNonIntegralPointerType(PT: BasePtrTy)) \|\|
6135	(ScalePtrTy && DL->isNonIntegralPointerType(PT: ScalePtrTy)) \|\|
6136	(AddrMode.BaseGV &&
6137	DL->isNonIntegralPointerType(PT: AddrMode.BaseGV->getType())))
6138	return Modified;
6139
6140	LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6141	<< " for " << *MemoryInst << "\n");
6142	Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6143	Value Result = nullptr*;
6144
6145	// Start with the base register. Do this first so that subsequent address
6146	// matching finds it last, which will prevent it from trying to match it
6147	// as the scaled value in case it happens to be a mul. That would be
6148	// problematic if we've sunk a different mul for the scale, because then
6149	// we'd end up sinking both muls.
6150	if (AddrMode.BaseReg) {
6151	Value *V = AddrMode.BaseReg;
6152	if (V->getType()->isPointerTy())
6153	V = Builder.CreatePtrToInt(V, DestTy: IntPtrTy, Name: "sunkaddr");
6154	if (V->getType() != IntPtrTy)
6155	V = Builder.CreateIntCast(V, DestTy: IntPtrTy, /isSigned=/true, Name: "sunkaddr");
6156	Result = V;
6157	}
6158
6159	// Add the scale value.
6160	if (AddrMode.Scale) {
6161	Value *V = AddrMode.ScaledReg;
6162	if (V->getType() == IntPtrTy) {
6163	// done.
6164	} else if (V->getType()->isPointerTy()) {
6165	V = Builder.CreatePtrToInt(V, DestTy: IntPtrTy, Name: "sunkaddr");
6166	} else if (cast<IntegerType>(Val: IntPtrTy)->getBitWidth() <
6167	cast<IntegerType>(Val: V->getType())->getBitWidth()) {
6168	V = Builder.CreateTrunc(V, DestTy: IntPtrTy, Name: "sunkaddr");
6169	} else {
6170	// It is only safe to sign extend the BaseReg if we know that the math
6171	// required to create it did not overflow before we extend it. Since
6172	// the original IR value was tossed in favor of a constant back when
6173	// the AddrMode was created we need to bail out gracefully if widths
6174	// do not match instead of extending it.
6175	Instruction *I = dyn_cast_or_null<Instruction>(Val: Result);
6176	if (I && (Result != AddrMode.BaseReg))
6177	I->eraseFromParent();
6178	return Modified;
6179	}
6180	if (AddrMode.Scale != `1`)
6181	V = Builder.CreateMul(LHS: V, RHS: ConstantInt::get(Ty: IntPtrTy, V: AddrMode.Scale),
6182	Name: "sunkaddr");
6183	if (Result)
6184	Result = Builder.CreateAdd(LHS: Result, RHS: V, Name: "sunkaddr");
6185	else
6186	Result = V;
6187	}
6188
6189	// Add in the BaseGV if present.
6190	GlobalValue *BaseGV = AddrMode.BaseGV;
6191	if (BaseGV != nullptr) {
6192	Value *BaseGVPtr;
6193	if (BaseGV->isThreadLocal()) {
6194	BaseGVPtr = Builder.CreateThreadLocalAddress(Ptr: BaseGV);
6195	} else {
6196	BaseGVPtr = BaseGV;
6197	}
6198	Value *V = Builder.CreatePtrToInt(V: BaseGVPtr, DestTy: IntPtrTy, Name: "sunkaddr");
6199	if (Result)
6200	Result = Builder.CreateAdd(LHS: Result, RHS: V, Name: "sunkaddr");
6201	else
6202	Result = V;
6203	}
6204
6205	// Add in the Base Offset if present.
6206	if (AddrMode.BaseOffs) {
6207	Value *V = ConstantInt::get(Ty: IntPtrTy, V: AddrMode.BaseOffs);
6208	if (Result)
6209	Result = Builder.CreateAdd(LHS: Result, RHS: V, Name: "sunkaddr");
6210	else
6211	Result = V;
6212	}
6213
6214	if (!Result)
6215	SunkAddr = Constant::getNullValue(Ty: Addr->getType());
6216	else
6217	SunkAddr = Builder.CreateIntToPtr(V: Result, DestTy: Addr->getType(), Name: "sunkaddr");
6218	}
6219
6220	MemoryInst->replaceUsesOfWith(From: Repl, To: SunkAddr);
6221	// Store the newly computed address into the cache. In the case we reused a
6222	// value, this should be idempotent.
6223	SunkAddrs [Addr] = WeakTrackingVH (SunkAddr);
6224
6225	// If we have no uses, recursively delete the value and all dead instructions
6226	// using it.
6227	if (Repl->use_empty()) {
6228	resetIteratorIfInvalidatedWhileCalling(BB: CurInstIterator ->getParent(), f: [&]() {
6229	RecursivelyDeleteTriviallyDeadInstructions(
6230	V: Repl, TLI: TLInfo, MSSAU: nullptr,
6231	AboutToDeleteCallback: [&](Value *V) { removeAllAssertingVHReferences(V); });
6232	});
6233	}
6234	++NumMemoryInsts;
6235	return true;
6236	}
6237
6238	/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6239	/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6240	/// only handle a 2 operand GEP in the same basic block or a splat constant
6241	/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6242	/// index.
6243	///
6244	/// If the existing GEP has a vector base pointer that is splat, we can look
6245	/// through the splat to find the scalar pointer. If we can't find a scalar
6246	/// pointer there's nothing we can do.
6247	///
6248	/// If we have a GEP with more than 2 indices where the middle indices are all
6249	/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6250	///
6251	/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6252	/// followed by a GEP with an all zeroes vector index. This will enable
6253	/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6254	/// zero index.
6255	bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6256	Value *Ptr) {
6257	Value *NewAddr;
6258
6259	if (const auto *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr)) {
6260	// Don't optimize GEPs that don't have indices.
6261	if (!GEP->hasIndices())
6262	return false;
6263
6264	// If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6265	// FIXME: We should support this by sinking the GEP.
6266	if (MemoryInst->getParent() != GEP->getParent())
6267	return false;
6268
6269	SmallVector<Value *, `2`> Ops(GEP->operands());
6270
6271	bool RewriteGEP = false;
6272
6273	if (Ops [`0`]->getType()->isVectorTy()) {
6274	Ops [`0`] = getSplatValue(V: Ops [`0`]);
6275	if (!Ops [`0`])
6276	return false;
6277	RewriteGEP = true;
6278	}
6279
6280	unsigned FinalIndex = Ops.size() - `1`;
6281
6282	// Ensure all but the last index is 0.
6283	// FIXME: This isn't strictly required. All that's required is that they are
6284	// all scalars or splats.
6285	for (unsigned i = `1`; i < FinalIndex; ++i) {
6286	auto *C = dyn_cast<Constant>(Val: Ops [i]);
6287	if (!C)
6288	return false;
6289	if (isa<VectorType>(Val: C->getType()))
6290	C = C->getSplatValue();
6291	auto *CI = dyn_cast_or_null<ConstantInt>(Val: C);
6292	if (!CI \|\| !CI->isZero())
6293	return false;
6294	// Scalarize the index if needed.
6295	Ops [i] = CI;
6296	}
6297
6298	// Try to scalarize the final index.
6299	if (Ops [FinalIndex]->getType()->isVectorTy()) {
6300	if (Value *V = getSplatValue(V: Ops [FinalIndex])) {
6301	auto *C = dyn_cast<ConstantInt>(Val: V);
6302	// Don't scalarize all zeros vector.
6303	if (!C \|\| !C->isZero()) {
6304	Ops [FinalIndex] = V;
6305	RewriteGEP = true;
6306	}
6307	}
6308	}
6309
6310	// If we made any changes or the we have extra operands, we need to generate
6311	// new instructions.
6312	if (!RewriteGEP && Ops.size() == `2`)
6313	return false;
6314
6315	auto NumElts = cast<VectorType>(Val: Ptr->getType())->getElementCount();
6316
6317	IRBuilder<> Builder(MemoryInst);
6318
6319	Type *SourceTy = GEP->getSourceElementType();
6320	Type *ScalarIndexTy = DL->getIndexType(PtrTy: Ops [`0`]->getType()->getScalarType());
6321
6322	// If the final index isn't a vector, emit a scalar GEP containing all ops
6323	// and a vector GEP with all zeroes final index.
6324	if (!Ops [FinalIndex]->getType()->isVectorTy()) {
6325	NewAddr = Builder.CreateGEP(Ty: SourceTy, Ptr: Ops [`0`], IdxList: ArrayRef(Ops).drop_front());
6326	auto *IndexTy = VectorType::get(ElementType: ScalarIndexTy, EC: NumElts);
6327	auto *SecondTy = GetElementPtrInst::getIndexedType(
6328	Ty: SourceTy, IdxList: ArrayRef(Ops).drop_front());
6329	NewAddr =
6330	Builder.CreateGEP(Ty: SecondTy, Ptr: NewAddr, IdxList: Constant::getNullValue(Ty: IndexTy));
6331	} else {
6332	Value *Base = Ops [`0`];
6333	Value *Index = Ops [FinalIndex];
6334
6335	// Create a scalar GEP if there are more than 2 operands.
6336	if (Ops.size() != `2`) {
6337	// Replace the last index with 0.
6338	Ops [FinalIndex] =
6339	Constant::getNullValue(Ty: Ops [FinalIndex]->getType()->getScalarType());
6340	Base = Builder.CreateGEP(Ty: SourceTy, Ptr: Base, IdxList: ArrayRef(Ops).drop_front());
6341	SourceTy = GetElementPtrInst::getIndexedType(
6342	Ty: SourceTy, IdxList: ArrayRef(Ops).drop_front());
6343	}
6344
6345	// Now create the GEP with scalar pointer and vector index.
6346	NewAddr = Builder.CreateGEP(Ty: SourceTy, Ptr: Base, IdxList: Index);
6347	}
6348	} else if (!isa<Constant>(Val: Ptr)) {
6349	// Not a GEP, maybe its a splat and we can create a GEP to enable
6350	// SelectionDAGBuilder to use it as a uniform base.
6351	Value *V = getSplatValue(V: Ptr);
6352	if (!V)
6353	return false;
6354
6355	auto NumElts = cast<VectorType>(Val: Ptr->getType())->getElementCount();
6356
6357	IRBuilder<> Builder(MemoryInst);
6358
6359	// Emit a vector GEP with a scalar pointer and all 0s vector index.
6360	Type *ScalarIndexTy = DL->getIndexType(PtrTy: V->getType()->getScalarType());
6361	auto *IndexTy = VectorType::get(ElementType: ScalarIndexTy, EC: NumElts);
6362	Type *ScalarTy;
6363	if (cast<IntrinsicInst>(Val: MemoryInst)->getIntrinsicID() ==
6364	Intrinsic::masked_gather) {
6365	ScalarTy = MemoryInst->getType()->getScalarType();
6366	} else {
6367	assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6368	Intrinsic::masked_scatter);
6369	ScalarTy = MemoryInst->getOperand(i: `0`)->getType()->getScalarType();
6370	}
6371	NewAddr = Builder.CreateGEP(Ty: ScalarTy, Ptr: V, IdxList: Constant::getNullValue(Ty: IndexTy));
6372	} else {
6373	// Constant, SelectionDAGBuilder knows to check if its a splat.
6374	return false;
6375	}
6376
6377	MemoryInst->replaceUsesOfWith(From: Ptr, To: NewAddr);
6378
6379	// If we have no uses, recursively delete the value and all dead instructions
6380	// using it.
6381	if (Ptr->use_empty())
6382	RecursivelyDeleteTriviallyDeadInstructions(
6383	V: Ptr, TLI: TLInfo, MSSAU: nullptr,
6384	AboutToDeleteCallback: [&](Value *V) { removeAllAssertingVHReferences(V); });
6385
6386	return true;
6387	}
6388
6389	/// If there are any memory operands, use OptimizeMemoryInst to sink their
6390	/// address computing into the block when possible / profitable.
6391	bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6392	bool MadeChange = false;
6393
6394	const TargetRegisterInfo *TRI =
6395	TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6396	TargetLowering::AsmOperandInfoVector TargetConstraints =
6397	TLI->ParseConstraints(DL: DL, TRI, Call: CS);
6398	unsigned ArgNo = `0`;
6399	for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6400	// Compute the constraint code and ConstraintType to use.
6401	TLI->ComputeConstraintToUse(OpInfo, Op: SDValue ());
6402
6403	// TODO: Also handle C_Address?
6404	if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6405	OpInfo.isIndirect) {
6406	Value *OpVal = CS->getArgOperand(i: ArgNo++);
6407	MadeChange \|= optimizeMemoryInst(MemoryInst: CS, Addr: OpVal, AccessTy: OpVal->getType(), AddrSpace: ~`0u`);
6408	} else if (OpInfo.Type == InlineAsm::isInput)
6409	ArgNo++;
6410	}
6411
6412	return MadeChange;
6413	}
6414
6415	/// Check if all the uses of \p Val are equivalent (or free) zero or
6416	/// sign extensions.
6417	static bool hasSameExtUse(Value Val, const* TargetLowering &TLI) {
6418	assert(!Val->use_empty() && "Input must have at least one use");
6419	const Instruction FirstUser = cast<Instruction>(Val: Val->user_begin());
6420	bool IsSExt = isa<SExtInst>(Val: FirstUser);
6421	Type *ExtTy = FirstUser->getType();
6422	for (const User *U : Val->users()) {
6423	const Instruction *UI = cast<Instruction>(Val: U);
6424	if ((IsSExt && !isa<SExtInst>(Val: UI)) \|\| (!IsSExt && !isa<ZExtInst>(Val: UI)))
6425	return false;
6426	Type *CurTy = UI->getType();
6427	// Same input and output types: Same instruction after CSE.
6428	if (CurTy == ExtTy)
6429	continue;
6430
6431	// If IsSExt is true, we are in this situation:
6432	// a = Val
6433	// b = sext ty1 a to ty2
6434	// c = sext ty1 a to ty3
6435	// Assuming ty2 is shorter than ty3, this could be turned into:
6436	// a = Val
6437	// b = sext ty1 a to ty2
6438	// c = sext ty2 b to ty3
6439	// However, the last sext is not free.
6440	if (IsSExt)
6441	return false;
6442
6443	// This is a ZExt, maybe this is free to extend from one type to another.
6444	// In that case, we would not account for a different use.
6445	Type *NarrowTy;
6446	Type *LargeTy;
6447	if (ExtTy->getScalarType()->getIntegerBitWidth() >
6448	CurTy->getScalarType()->getIntegerBitWidth()) {
6449	NarrowTy = CurTy;
6450	LargeTy = ExtTy;
6451	} else {
6452	NarrowTy = ExtTy;
6453	LargeTy = CurTy;
6454	}
6455
6456	if (!TLI.isZExtFree(FromTy: NarrowTy, ToTy: LargeTy))
6457	return false;
6458	}
6459	// All uses are the same or can be derived from one another for free.
6460	return true;
6461	}
6462
6463	/// Try to speculatively promote extensions in \p Exts and continue
6464	/// promoting through newly promoted operands recursively as far as doing so is
6465	/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6466	/// When some promotion happened, \p TPT contains the proper state to revert
6467	/// them.
6468	///
6469	/// \return true if some promotion happened, false otherwise.
6470	bool CodeGenPrepare::tryToPromoteExts(
6471	TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6472	SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6473	unsigned CreatedInstsCost) {
6474	bool Promoted = false;
6475
6476	// Iterate over all the extensions to try to promote them.
6477	for (auto *I : Exts) {
6478	// Early check if we directly have ext(load).
6479	if (isa<LoadInst>(Val: I->getOperand(i: `0`))) {
6480	ProfitablyMovedExts.push_back(Elt: I);
6481	continue;
6482	}
6483
6484	// Check whether or not we want to do any promotion. The reason we have
6485	// this check inside the for loop is to catch the case where an extension
6486	// is directly fed by a load because in such case the extension can be moved
6487	// up without any promotion on its operands.
6488	if (!TLI->enableExtLdPromotion() \|\| DisableExtLdPromotion)
6489	return false;
6490
6491	// Get the action to perform the promotion.
6492	TypePromotionHelper::Action TPH =
6493	TypePromotionHelper::getAction(Ext: I, InsertedInsts, TLI: *TLI, PromotedInsts);
6494	// Check if we can promote.
6495	if (!TPH) {
6496	// Save the current extension as we cannot move up through its operand.
6497	ProfitablyMovedExts.push_back(Elt: I);
6498	continue;
6499	}
6500
6501	// Save the current state.
6502	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6503	TPT.getRestorationPoint();
6504	SmallVector<Instruction *, `4`> NewExts;
6505	unsigned NewCreatedInstsCost = `0`;
6506	unsigned ExtCost = !TLI->isExtFree(I);
6507	// Promote.
6508	Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6509	&NewExts, nullptr, *TLI);
6510	assert(PromotedVal &&
6511	"TypePromotionHelper should have filtered out those cases");
6512
6513	// We would be able to merge only one extension in a load.
6514	// Therefore, if we have more than 1 new extension we heuristically
6515	// cut this search path, because it means we degrade the code quality.
6516	// With exactly 2, the transformation is neutral, because we will merge
6517	// one extension but leave one. However, we optimistically keep going,
6518	// because the new extension may be removed too. Also avoid replacing a
6519	// single free extension with multiple extensions, as this increases the
6520	// number of IR instructions while not providing any savings.
6521	long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6522	// FIXME: It would be possible to propagate a negative value instead of
6523	// conservatively ceiling it to 0.
6524	TotalCreatedInstsCost =
6525	std::max(a: (long long)`0`, b: (TotalCreatedInstsCost - ExtCost));
6526	if (!StressExtLdPromotion &&
6527	(TotalCreatedInstsCost > `1` \|\|
6528	!isPromotedInstructionLegal(TLI: TLI, DL: DL, Val: PromotedVal) \|\|
6529	(ExtCost == `0` && NewExts.size() > `1`))) {
6530	// This promotion is not profitable, rollback to the previous state, and
6531	// save the current extension in ProfitablyMovedExts as the latest
6532	// speculative promotion turned out to be unprofitable.
6533	TPT.rollback(Point: LastKnownGood);
6534	ProfitablyMovedExts.push_back(Elt: I);
6535	continue;
6536	}
6537	// Continue promoting NewExts as far as doing so is profitable.
6538	SmallVector<Instruction *, `2`> NewlyMovedExts;
6539	(void)tryToPromoteExts(TPT, Exts: NewExts, ProfitablyMovedExts&: NewlyMovedExts, CreatedInstsCost: TotalCreatedInstsCost);
6540	bool NewPromoted = false;
6541	for (auto *ExtInst : NewlyMovedExts) {
6542	Instruction *MovedExt = cast<Instruction>(Val: ExtInst);
6543	Value *ExtOperand = MovedExt->getOperand(i: `0`);
6544	// If we have reached to a load, we need this extra profitability check
6545	// as it could potentially be merged into an ext(load).
6546	if (isa<LoadInst>(Val: ExtOperand) &&
6547	!(StressExtLdPromotion \|\| NewCreatedInstsCost <= ExtCost \|\|
6548	(ExtOperand->hasOneUse() \|\| hasSameExtUse(Val: ExtOperand, TLI: *TLI))))
6549	continue;
6550
6551	ProfitablyMovedExts.push_back(Elt: MovedExt);
6552	NewPromoted = true;
6553	}
6554
6555	// If none of speculative promotions for NewExts is profitable, rollback
6556	// and save the current extension (I) as the last profitable extension.
6557	if (!NewPromoted) {
6558	TPT.rollback(Point: LastKnownGood);
6559	ProfitablyMovedExts.push_back(Elt: I);
6560	continue;
6561	}
6562	// The promotion is profitable.
6563	Promoted = true;
6564	}
6565	return Promoted;
6566	}
6567
6568	/// Merging redundant sexts when one is dominating the other.
6569	bool CodeGenPrepare::mergeSExts(Function &F) {
6570	bool Changed = false;
6571	for (auto &Entry : ValToSExtendedUses) {
6572	SExts &Insts = Entry.second;
6573	SExts CurPts;
6574	for (Instruction *Inst : Insts) {
6575	if (RemovedInsts.count(Ptr: Inst) \|\| !isa<SExtInst>(Val: Inst) \|\|
6576	Inst->getOperand(i: `0`) != Entry.first)
6577	continue;
6578	bool inserted = false;
6579	for (auto &Pt : CurPts) {
6580	if (getDT(F).dominates(Def: Inst, User: Pt)) {
6581	replaceAllUsesWith(Old: Pt, New: Inst, FreshBBs, IsHuge: IsHugeFunc);
6582	RemovedInsts.insert(Ptr: Pt);
6583	Pt->removeFromParent();
6584	Pt = Inst;
6585	inserted = true;
6586	Changed = true;
6587	break;
6588	}
6589	if (!getDT(F).dominates(Def: Pt, User: Inst))
6590	// Give up if we need to merge in a common dominator as the
6591	// experiments show it is not profitable.
6592	continue;
6593	replaceAllUsesWith(Old: Inst, New: Pt, FreshBBs, IsHuge: IsHugeFunc);
6594	RemovedInsts.insert(Ptr: Inst);
6595	Inst->removeFromParent();
6596	inserted = true;
6597	Changed = true;
6598	break;
6599	}
6600	if (!inserted)
6601	CurPts.push_back(Elt: Inst);
6602	}
6603	}
6604	return Changed;
6605	}
6606
6607	// Splitting large data structures so that the GEPs accessing them can have
6608	// smaller offsets so that they can be sunk to the same blocks as their users.
6609	// For example, a large struct starting from %base is split into two parts
6610	// where the second part starts from %new_base.
6611	//
6612	// Before:
6613	// BB0:
6614	// %base =
6615	//
6616	// BB1:
6617	// %gep0 = gep %base, off0
6618	// %gep1 = gep %base, off1
6619	// %gep2 = gep %base, off2
6620	//
6621	// BB2:
6622	// %load1 = load %gep0
6623	// %load2 = load %gep1
6624	// %load3 = load %gep2
6625	//
6626	// After:
6627	// BB0:
6628	// %base =
6629	// %new_base = gep %base, off0
6630	//
6631	// BB1:
6632	// %new_gep0 = %new_base
6633	// %new_gep1 = gep %new_base, off1 - off0
6634	// %new_gep2 = gep %new_base, off2 - off0
6635	//
6636	// BB2:
6637	// %load1 = load i32, i32 %new_gep0*
6638	// %load2 = load i32, i32 %new_gep1*
6639	// %load3 = load i32, i32 %new_gep2*
6640	//
6641	// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6642	// their offsets are smaller enough to fit into the addressing mode.
6643	bool CodeGenPrepare::splitLargeGEPOffsets() {
6644	bool Changed = false;
6645	for (auto &Entry : LargeOffsetGEPMap) {
6646	Value *OldBase = Entry.first;
6647	SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6648	&LargeOffsetGEPs = Entry.second;
6649	auto compareGEPOffset =
6650	[&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6651	const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6652	if (LHS.first == RHS.first)
6653	return false;
6654	if (LHS.second != RHS.second)
6655	return LHS.second < RHS.second;
6656	return LargeOffsetGEPID [LHS.first] < LargeOffsetGEPID [RHS.first];
6657	};
6658	// Sorting all the GEPs of the same data structures based on the offsets.
6659	llvm::sort(C&: LargeOffsetGEPs, Comp: compareGEPOffset);
6660	LargeOffsetGEPs.erase(CS: llvm::unique(R&: LargeOffsetGEPs), CE: LargeOffsetGEPs.end());
6661	// Skip if all the GEPs have the same offsets.
6662	if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6663	continue;
6664	GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6665	int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6666	Value NewBaseGEP = nullptr*;
6667
6668	auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6669	GetElementPtrInst *GEP) {
6670	LLVMContext &Ctx = GEP->getContext();
6671	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
6672	Type *I8PtrTy =
6673	PointerType::get(C&: Ctx, AddressSpace: GEP->getType()->getPointerAddressSpace());
6674
6675	BasicBlock::iterator NewBaseInsertPt;
6676	BasicBlock *NewBaseInsertBB;
6677	if (auto *BaseI = dyn_cast<Instruction>(Val: OldBase)) {
6678	// If the base of the struct is an instruction, the new base will be
6679	// inserted close to it.
6680	NewBaseInsertBB = BaseI->getParent();
6681	if (isa<PHINode>(Val: BaseI))
6682	NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6683	else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Val: BaseI)) {
6684	NewBaseInsertBB =
6685	SplitEdge(From: NewBaseInsertBB, To: Invoke->getNormalDest(), DT: DT.get(), LI);
6686	NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6687	} else
6688	NewBaseInsertPt = std::next(x: BaseI->getIterator());
6689	} else {
6690	// If the current base is an argument or global value, the new base
6691	// will be inserted to the entry block.
6692	NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6693	NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6694	}
6695	IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6696	// Create a new base.
6697	Value *BaseIndex = ConstantInt::get(Ty: PtrIdxTy, V: BaseOffset);
6698	NewBaseGEP = OldBase;
6699	if (NewBaseGEP->getType() != I8PtrTy)
6700	NewBaseGEP = NewBaseBuilder.CreatePointerCast(V: NewBaseGEP, DestTy: I8PtrTy);
6701	NewBaseGEP =
6702	NewBaseBuilder.CreatePtrAdd(Ptr: NewBaseGEP, Offset: BaseIndex, Name: "splitgep");
6703	NewGEPBases.insert(V: NewBaseGEP);
6704	return;
6705	};
6706
6707	// Check whether all the offsets can be encoded with prefered common base.
6708	if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6709	MinOffset: LargeOffsetGEPs.front().second, MaxOffset: LargeOffsetGEPs.back().second)) {
6710	BaseOffset = PreferBase;
6711	// Create a new base if the offset of the BaseGEP can be decoded with one
6712	// instruction.
6713	createNewBase (BaseOffset, OldBase, BaseGEP);
6714	}
6715
6716	auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6717	while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6718	GetElementPtrInst *GEP = LargeOffsetGEP->first;
6719	int64_t Offset = LargeOffsetGEP->second;
6720	if (Offset != BaseOffset) {
6721	TargetLowering::AddrMode AddrMode;
6722	AddrMode.HasBaseReg = true;
6723	AddrMode.BaseOffs = Offset - BaseOffset;
6724	// The result type of the GEP might not be the type of the memory
6725	// access.
6726	if (!TLI->isLegalAddressingMode(DL: *DL, AM: AddrMode,
6727	Ty: GEP->getResultElementType(),
6728	AddrSpace: GEP->getAddressSpace())) {
6729	// We need to create a new base if the offset to the current base is
6730	// too large to fit into the addressing mode. So, a very large struct
6731	// may be split into several parts.
6732	BaseGEP = GEP;
6733	BaseOffset = Offset;
6734	NewBaseGEP = nullptr;
6735	}
6736	}
6737
6738	// Generate a new GEP to replace the current one.
6739	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
6740
6741	if (!NewBaseGEP) {
6742	// Create a new base if we don't have one yet. Find the insertion
6743	// pointer for the new base first.
6744	createNewBase (BaseOffset, OldBase, GEP);
6745	}
6746
6747	IRBuilder<> Builder(GEP);
6748	Value *NewGEP = NewBaseGEP;
6749	if (Offset != BaseOffset) {
6750	// Calculate the new offset for the new GEP.
6751	Value *Index = ConstantInt::get(Ty: PtrIdxTy, V: Offset - BaseOffset);
6752	NewGEP = Builder.CreatePtrAdd(Ptr: NewBaseGEP, Offset: Index);
6753	}
6754	replaceAllUsesWith(Old: GEP, New: NewGEP, FreshBBs, IsHuge: IsHugeFunc);
6755	LargeOffsetGEPID.erase(Val: GEP);
6756	LargeOffsetGEP = LargeOffsetGEPs.erase(CI: LargeOffsetGEP);
6757	GEP->eraseFromParent();
6758	Changed = true;
6759	}
6760	}
6761	return Changed;
6762	}
6763
6764	bool CodeGenPrepare::optimizePhiType(
6765	PHINode I, SmallPtrSetImpl<PHINode > &Visited,
6766	SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6767	// We are looking for a collection on interconnected phi nodes that together
6768	// only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6769	// are of the same type. Convert the whole set of nodes to the type of the
6770	// bitcast.
6771	Type *PhiTy = I->getType();
6772	Type ConvertTy = nullptr*;
6773	if (Visited.count(Ptr: I) \|\|
6774	(!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6775	return false;
6776
6777	SmallVector<Instruction *, `4`> Worklist;
6778	Worklist.push_back(Elt: cast<Instruction>(Val: I));
6779	SmallPtrSet<PHINode *, `4`> PhiNodes;
6780	SmallPtrSet<ConstantData *, `4`> Constants;
6781	PhiNodes.insert(Ptr: I);
6782	Visited.insert(Ptr: I);
6783	SmallPtrSet<Instruction *, `4`> Defs;
6784	SmallPtrSet<Instruction *, `4`> Uses;
6785	// This works by adding extra bitcasts between load/stores and removing
6786	// existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6787	// we can get in the situation where we remove a bitcast in one iteration
6788	// just to add it again in the next. We need to ensure that at least one
6789	// bitcast we remove are anchored to something that will not change back.
6790	bool AnyAnchored = false;
6791
6792	while (!Worklist.empty()) {
6793	Instruction *II = Worklist.pop_back_val();
6794
6795	if (auto *Phi = dyn_cast<PHINode>(Val: II)) {
6796	// Handle Defs, which might also be PHI's
6797	for (Value *V : Phi->incoming_values()) {
6798	if (auto *OpPhi = dyn_cast<PHINode>(Val: V)) {
6799	if (!PhiNodes.count(Ptr: OpPhi)) {
6800	if (!Visited.insert(Ptr: OpPhi).second)
6801	return false;
6802	PhiNodes.insert(Ptr: OpPhi);
6803	Worklist.push_back(Elt: OpPhi);
6804	}
6805	} else if (auto *OpLoad = dyn_cast<LoadInst>(Val: V)) {
6806	if (!OpLoad->isSimple())
6807	return false;
6808	if (Defs.insert(Ptr: OpLoad).second)
6809	Worklist.push_back(Elt: OpLoad);
6810	} else if (auto *OpEx = dyn_cast<ExtractElementInst>(Val: V)) {
6811	if (Defs.insert(Ptr: OpEx).second)
6812	Worklist.push_back(Elt: OpEx);
6813	} else if (auto *OpBC = dyn_cast<BitCastInst>(Val: V)) {
6814	if (!ConvertTy)
6815	ConvertTy = OpBC->getOperand(i_nocapture: `0`)->getType();
6816	if (OpBC->getOperand(i_nocapture: `0`)->getType() != ConvertTy)
6817	return false;
6818	if (Defs.insert(Ptr: OpBC).second) {
6819	Worklist.push_back(Elt: OpBC);
6820	AnyAnchored \|= !isa<LoadInst>(Val: OpBC->getOperand(i_nocapture: `0`)) &&
6821	!isa<ExtractElementInst>(Val: OpBC->getOperand(i_nocapture: `0`));
6822	}
6823	} else if (auto *OpC = dyn_cast<ConstantData>(Val: V))
6824	Constants.insert(Ptr: OpC);
6825	else
6826	return false;
6827	}
6828	}
6829
6830	// Handle uses which might also be phi's
6831	for (User *V : II->users()) {
6832	if (auto *OpPhi = dyn_cast<PHINode>(Val: V)) {
6833	if (!PhiNodes.count(Ptr: OpPhi)) {
6834	if (Visited.count(Ptr: OpPhi))
6835	return false;
6836	PhiNodes.insert(Ptr: OpPhi);
6837	Visited.insert(Ptr: OpPhi);
6838	Worklist.push_back(Elt: OpPhi);
6839	}
6840	} else if (auto *OpStore = dyn_cast<StoreInst>(Val: V)) {
6841	if (!OpStore->isSimple() \|\| OpStore->getOperand(i_nocapture: `0`) != II)
6842	return false;
6843	Uses.insert(Ptr: OpStore);
6844	} else if (auto *OpBC = dyn_cast<BitCastInst>(Val: V)) {
6845	if (!ConvertTy)
6846	ConvertTy = OpBC->getType();
6847	if (OpBC->getType() != ConvertTy)
6848	return false;
6849	Uses.insert(Ptr: OpBC);
6850	AnyAnchored \|=
6851	any_of(Range: OpBC->users(), P: [](User U) { return* !isa<StoreInst>(Val: U); });
6852	} else {
6853	return false;
6854	}
6855	}
6856	}
6857
6858	if (!ConvertTy \|\| !AnyAnchored \|\|
6859	!TLI->shouldConvertPhiType(From: PhiTy, To: ConvertTy))
6860	return false;
6861
6862	LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6863	<< *ConvertTy << "\n");
6864
6865	// Create all the new phi nodes of the new type, and bitcast any loads to the
6866	// correct type.
6867	ValueToValueMap ValMap;
6868	for (ConstantData *C : Constants)
6869	ValMap [C] = ConstantExpr::getBitCast(C, Ty: ConvertTy);
6870	for (Instruction *D : Defs) {
6871	if (isa<BitCastInst>(Val: D)) {
6872	ValMap [D] = D->getOperand(i: `0`);
6873	DeletedInstrs.insert(Ptr: D);
6874	} else {
6875	BasicBlock::iterator insertPt = std::next(x: D->getIterator());
6876	ValMap [D] = new BitCastInst (D, ConvertTy, D->getName() + ".bc", insertPt);
6877	}
6878	}
6879	for (PHINode *Phi : PhiNodes)
6880	ValMap [Phi] = PHINode::Create(Ty: ConvertTy, NumReservedValues: Phi->getNumIncomingValues(),
6881	NameStr: Phi->getName() + ".tc", InsertBefore: Phi->getIterator());
6882	// Pipe together all the PhiNodes.
6883	for (PHINode *Phi : PhiNodes) {
6884	PHINode *NewPhi = cast<PHINode>(Val: ValMap [Phi]);
6885	for (int i = `0`, e = Phi->getNumIncomingValues(); i < e; i++)
6886	NewPhi->addIncoming(V: ValMap [Phi->getIncomingValue(i)],
6887	BB: Phi->getIncomingBlock(i));
6888	Visited.insert(Ptr: NewPhi);
6889	}
6890	// And finally pipe up the stores and bitcasts
6891	for (Instruction *U : Uses) {
6892	if (isa<BitCastInst>(Val: U)) {
6893	DeletedInstrs.insert(Ptr: U);
6894	replaceAllUsesWith(Old: U, New: ValMap [U->getOperand(i: `0`)], FreshBBs, IsHuge: IsHugeFunc);
6895	} else {
6896	U->setOperand(i: `0`, Val: new BitCastInst (ValMap [U->getOperand(i: `0`)], PhiTy, "bc",
6897	U->getIterator()));
6898	}
6899	}
6900
6901	// Save the removed phis to be deleted later.
6902	DeletedInstrs.insert_range(R&: PhiNodes);
6903	return true;
6904	}
6905
6906	bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6907	if (!OptimizePhiTypes)
6908	return false;
6909
6910	bool Changed = false;
6911	SmallPtrSet<PHINode *, `4`> Visited;
6912	SmallPtrSet<Instruction *, `4`> DeletedInstrs;
6913
6914	// Attempt to optimize all the phis in the functions to the correct type.
6915	for (auto &BB : F)
6916	for (auto &Phi : BB.phis())
6917	Changed \|= optimizePhiType(I: &Phi, Visited, DeletedInstrs);
6918
6919	// Remove any old phi's that have been converted.
6920	for (auto *I : DeletedInstrs) {
6921	replaceAllUsesWith(Old: I, New: PoisonValue::get(T: I->getType()), FreshBBs, IsHuge: IsHugeFunc);
6922	I->eraseFromParent();
6923	}
6924
6925	return Changed;
6926	}
6927
6928	/// Return true, if an ext(load) can be formed from an extension in
6929	/// \p MovedExts.
6930	bool CodeGenPrepare::canFormExtLd(
6931	const SmallVectorImpl<Instruction > &MovedExts, LoadInst &LI,
6932	Instruction &Inst, bool* HasPromoted) {
6933	for (auto *MovedExtInst : MovedExts) {
6934	if (isa<LoadInst>(Val: MovedExtInst->getOperand(i: `0`))) {
6935	LI = cast<LoadInst>(Val: MovedExtInst->getOperand(i: `0`));
6936	Inst = MovedExtInst;
6937	break;
6938	}
6939	}
6940	if (!LI)
6941	return false;
6942
6943	// If they're already in the same block, there's nothing to do.
6944	// Make the cheap checks first if we did not promote.
6945	// If we promoted, we need to check if it is indeed profitable.
6946	if (!HasPromoted && LI->getParent() == Inst->getParent())
6947	return false;
6948
6949	return TLI->isExtLoad(Load: LI, Ext: Inst, DL: *DL);
6950	}
6951
6952	/// Move a zext or sext fed by a load into the same basic block as the load,
6953	/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6954	/// extend into the load.
6955	///
6956	/// E.g.,
6957	/// \code
6958	/// %ld = load i32 %addr*
6959	/// %add = add nuw i32 %ld, 4
6960	/// %zext = zext i32 %add to i64
6961	// \endcode
6962	/// =>
6963	/// \code
6964	/// %ld = load i32 %addr*
6965	/// %zext = zext i32 %ld to i64
6966	/// %add = add nuw i64 %zext, 4
6967	/// \encode
6968	/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6969	/// allow us to match zext(load i32) to i64.*
6970	///
6971	/// Also, try to promote the computations used to obtain a sign extended
6972	/// value used into memory accesses.
6973	/// E.g.,
6974	/// \code
6975	/// a = add nsw i32 b, 3
6976	/// d = sext i32 a to i64
6977	/// e = getelementptr ..., i64 d
6978	/// \endcode
6979	/// =>
6980	/// \code
6981	/// f = sext i32 b to i64
6982	/// a = add nsw i64 f, 3
6983	/// e = getelementptr ..., i64 a
6984	/// \endcode
6985	///
6986	/// \p Inst[in/out] the extension may be modified during the process if some
6987	/// promotions apply.
6988	bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6989	bool AllowPromotionWithoutCommonHeader = false;
6990	/// See if it is an interesting sext operations for the address type
6991	/// promotion before trying to promote it, e.g., the ones with the right
6992	/// type and used in memory accesses.
6993	bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6994	I: *Inst, AllowPromotionWithoutCommonHeader);
6995	TypePromotionTransaction TPT(RemovedInsts);
6996	TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6997	TPT.getRestorationPoint();
6998	SmallVector<Instruction *, `1`> Exts;
6999	SmallVector<Instruction *, `2`> SpeculativelyMovedExts;
7000	Exts.push_back(Elt: Inst);
7001
7002	bool HasPromoted = tryToPromoteExts(TPT, Exts, ProfitablyMovedExts&: SpeculativelyMovedExts);
7003
7004	// Look for a load being extended.
7005	LoadInst LI = nullptr*;
7006	Instruction *ExtFedByLoad;
7007
7008	// Try to promote a chain of computation if it allows to form an extended
7009	// load.
7010	if (canFormExtLd(MovedExts: SpeculativelyMovedExts, LI, Inst&: ExtFedByLoad, HasPromoted)) {
7011	assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7012	TPT.commit();
7013	// Move the extend into the same block as the load.
7014	ExtFedByLoad->moveAfter(MovePos: LI);
7015	++NumExtsMoved;
7016	Inst = ExtFedByLoad;
7017	return true;
7018	}
7019
7020	// Continue promoting SExts if known as considerable depending on targets.
7021	if (ATPConsiderable &&
7022	performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7023	HasPromoted, TPT, SpeculativelyMovedExts))
7024	return true;
7025
7026	TPT.rollback(Point: LastKnownGood);
7027	return false;
7028	}
7029
7030	// Perform address type promotion if doing so is profitable.
7031	// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7032	// instructions that sign extended the same initial value. However, if
7033	// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7034	// extension is just profitable.
7035	bool CodeGenPrepare::performAddressTypePromotion(
7036	Instruction &Inst, bool* AllowPromotionWithoutCommonHeader,
7037	bool HasPromoted, TypePromotionTransaction &TPT,
7038	SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7039	bool Promoted = false;
7040	SmallPtrSet<Instruction *, `1`> UnhandledExts;
7041	bool AllSeenFirst = true;
7042	for (auto *I : SpeculativelyMovedExts) {
7043	Value *HeadOfChain = I->getOperand(i: `0`);
7044	DenseMap<Value , Instruction >::iterator AlreadySeen =
7045	SeenChainsForSExt.find(Val: HeadOfChain);
7046	// If there is an unhandled SExt which has the same header, try to promote
7047	// it as well.
7048	if (AlreadySeen != SeenChainsForSExt.end()) {
7049	if (AlreadySeen ->second != nullptr)
7050	UnhandledExts.insert(Ptr: AlreadySeen ->second);
7051	AllSeenFirst = false;
7052	}
7053	}
7054
7055	if (!AllSeenFirst \|\| (AllowPromotionWithoutCommonHeader &&
7056	SpeculativelyMovedExts.size() == `1`)) {
7057	TPT.commit();
7058	if (HasPromoted)
7059	Promoted = true;
7060	for (auto *I : SpeculativelyMovedExts) {
7061	Value *HeadOfChain = I->getOperand(i: `0`);
7062	SeenChainsForSExt [HeadOfChain] = nullptr;
7063	ValToSExtendedUses [HeadOfChain].push_back(Elt: I);
7064	}
7065	// Update Inst as promotion happen.
7066	Inst = SpeculativelyMovedExts.pop_back_val();
7067	} else {
7068	// This is the first chain visited from the header, keep the current chain
7069	// as unhandled. Defer to promote this until we encounter another SExt
7070	// chain derived from the same header.
7071	for (auto *I : SpeculativelyMovedExts) {
7072	Value *HeadOfChain = I->getOperand(i: `0`);
7073	SeenChainsForSExt [HeadOfChain] = Inst;
7074	}
7075	return false;
7076	}
7077
7078	if (!AllSeenFirst && !UnhandledExts.empty())
7079	for (auto *VisitedSExt : UnhandledExts) {
7080	if (RemovedInsts.count(Ptr: VisitedSExt))
7081	continue;
7082	TypePromotionTransaction TPT(RemovedInsts);
7083	SmallVector<Instruction *, `1`> Exts;
7084	SmallVector<Instruction *, `2`> Chains;
7085	Exts.push_back(Elt: VisitedSExt);
7086	bool HasPromoted = tryToPromoteExts(TPT, Exts, ProfitablyMovedExts&: Chains);
7087	TPT.commit();
7088	if (HasPromoted)
7089	Promoted = true;
7090	for (auto *I : Chains) {
7091	Value *HeadOfChain = I->getOperand(i: `0`);
7092	// Mark this as handled.
7093	SeenChainsForSExt [HeadOfChain] = nullptr;
7094	ValToSExtendedUses [HeadOfChain].push_back(Elt: I);
7095	}
7096	}
7097	return Promoted;
7098	}
7099
7100	bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7101	BasicBlock *DefBB = I->getParent();
7102
7103	// If the result of a {s\|z}ext and its source are both live out, rewrite all
7104	// other uses of the source with result of extension.
7105	Value *Src = I->getOperand(i: `0`);
7106	if (Src->hasOneUse())
7107	return false;
7108
7109	// Only do this xform if truncating is free.
7110	if (!TLI->isTruncateFree(FromTy: I->getType(), ToTy: Src->getType()))
7111	return false;
7112
7113	// Only safe to perform the optimization if the source is also defined in
7114	// this block.
7115	if (!isa<Instruction>(Val: Src) \|\| DefBB != cast<Instruction>(Val: Src)->getParent())
7116	return false;
7117
7118	bool DefIsLiveOut = false;
7119	for (User *U : I->users()) {
7120	Instruction *UI = cast<Instruction>(Val: U);
7121
7122	// Figure out which BB this ext is used in.
7123	BasicBlock *UserBB = UI->getParent();
7124	if (UserBB == DefBB)
7125	continue;
7126	DefIsLiveOut = true;
7127	break;
7128	}
7129	if (!DefIsLiveOut)
7130	return false;
7131
7132	// Make sure none of the uses are PHI nodes.
7133	for (User *U : Src->users()) {
7134	Instruction *UI = cast<Instruction>(Val: U);
7135	BasicBlock *UserBB = UI->getParent();
7136	if (UserBB == DefBB)
7137	continue;
7138	// Be conservative. We don't want this xform to end up introducing
7139	// reloads just before load / store instructions.
7140	if (isa<PHINode>(Val: UI) \|\| isa<LoadInst>(Val: UI) \|\| isa<StoreInst>(Val: UI))
7141	return false;
7142	}
7143
7144	// InsertedTruncs - Only insert one trunc in each block once.
7145	DenseMap<BasicBlock , Instruction > InsertedTruncs;
7146
7147	bool MadeChange = false;
7148	for (Use &U : Src->uses()) {
7149	Instruction *User = cast<Instruction>(Val: U.getUser());
7150
7151	// Figure out which BB this ext is used in.
7152	BasicBlock *UserBB = User->getParent();
7153	if (UserBB == DefBB)
7154	continue;
7155
7156	// Both src and def are live in this block. Rewrite the use.
7157	Instruction *&InsertedTrunc = InsertedTruncs [UserBB];
7158
7159	if (!InsertedTrunc) {
7160	BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7161	assert(InsertPt != UserBB->end());
7162	InsertedTrunc = new TruncInst (I, Src->getType(), "");
7163	InsertedTrunc->insertBefore(BB&: *UserBB, InsertPos: InsertPt);
7164	InsertedInsts.insert(Ptr: InsertedTrunc);
7165	}
7166
7167	// Replace a use of the {s\|z}ext source with a use of the result.
7168	U = InsertedTrunc;
7169	++NumExtUses;
7170	MadeChange = true;
7171	}
7172
7173	return MadeChange;
7174	}
7175
7176	// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7177	// just after the load if the target can fold this into one extload instruction,
7178	// with the hope of eliminating some of the other later "and" instructions using
7179	// the loaded value. "and"s that are made trivially redundant by the insertion
7180	// of the new "and" are removed by this function, while others (e.g. those whose
7181	// path from the load goes through a phi) are left for isel to potentially
7182	// remove.
7183	//
7184	// For example:
7185	//
7186	// b0:
7187	// x = load i32
7188	// ...
7189	// b1:
7190	// y = and x, 0xff
7191	// z = use y
7192	//
7193	// becomes:
7194	//
7195	// b0:
7196	// x = load i32
7197	// x' = and x, 0xff
7198	// ...
7199	// b1:
7200	// z = use x'
7201	//
7202	// whereas:
7203	//
7204	// b0:
7205	// x1 = load i32
7206	// ...
7207	// b1:
7208	// x2 = load i32
7209	// ...
7210	// b2:
7211	// x = phi x1, x2
7212	// y = and x, 0xff
7213	//
7214	// becomes (after a call to optimizeLoadExt for each load):
7215	//
7216	// b0:
7217	// x1 = load i32
7218	// x1' = and x1, 0xff
7219	// ...
7220	// b1:
7221	// x2 = load i32
7222	// x2' = and x2, 0xff
7223	// ...
7224	// b2:
7225	// x = phi x1', x2'
7226	// y = and x, 0xff
7227	bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7228	if (!Load->isSimple() \|\| !Load->getType()->isIntOrPtrTy())
7229	return false;
7230
7231	// Skip loads we've already transformed.
7232	if (Load->hasOneUse() &&
7233	InsertedInsts.count(Ptr: cast<Instruction>(Val: *Load->user_begin())))
7234	return false;
7235
7236	// Look at all uses of Load, looking through phis, to determine how many bits
7237	// of the loaded value are needed.
7238	SmallVector<Instruction *, `8`> WorkList;
7239	SmallPtrSet<Instruction *, `16`> Visited;
7240	SmallVector<Instruction *, `8`> AndsToMaybeRemove;
7241	SmallVector<Instruction *, `8`> DropFlags;
7242	for (auto *U : Load->users())
7243	WorkList.push_back(Elt: cast<Instruction>(Val: U));
7244
7245	EVT LoadResultVT = TLI->getValueType(DL: *DL, Ty: Load->getType());
7246	unsigned BitWidth = LoadResultVT.getSizeInBits();
7247	// If the BitWidth is 0, do not try to optimize the type
7248	if (BitWidth == `0`)
7249	return false;
7250
7251	APInt DemandBits(BitWidth, `0`);
7252	APInt WidestAndBits(BitWidth, `0`);
7253
7254	while (!WorkList.empty()) {
7255	Instruction *I = WorkList.pop_back_val();
7256
7257	// Break use-def graph loops.
7258	if (!Visited.insert(Ptr: I).second)
7259	continue;
7260
7261	// For a PHI node, push all of its users.
7262	if (auto *Phi = dyn_cast<PHINode>(Val: I)) {
7263	for (auto *U : Phi->users())
7264	WorkList.push_back(Elt: cast<Instruction>(Val: U));
7265	continue;
7266	}
7267
7268	switch (I->getOpcode()) {
7269	case Instruction::And: {
7270	auto *AndC = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`));
7271	if (!AndC)
7272	return false;
7273	APInt AndBits = AndC->getValue();
7274	DemandBits \|= AndBits;
7275	// Keep track of the widest and mask we see.
7276	if (AndBits.ugt(RHS: WidestAndBits))
7277	WidestAndBits = AndBits;
7278	if (AndBits == WidestAndBits && I->getOperand(i: `0`) == Load)
7279	AndsToMaybeRemove.push_back(Elt: I);
7280	break;
7281	}
7282
7283	case Instruction::Shl: {
7284	auto *ShlC = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`));
7285	if (!ShlC)
7286	return false;
7287	uint64_t ShiftAmt = ShlC->getLimitedValue(Limit: BitWidth - `1`);
7288	DemandBits.setLowBits(BitWidth - ShiftAmt);
7289	DropFlags.push_back(Elt: I);
7290	break;
7291	}
7292
7293	case Instruction::Trunc: {
7294	EVT TruncVT = TLI->getValueType(DL: *DL, Ty: I->getType());
7295	unsigned TruncBitWidth = TruncVT.getSizeInBits();
7296	DemandBits.setLowBits(TruncBitWidth);
7297	DropFlags.push_back(Elt: I);
7298	break;
7299	}
7300
7301	default:
7302	return false;
7303	}
7304	}
7305
7306	uint32_t ActiveBits = DemandBits.getActiveBits();
7307	// Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7308	// target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7309	// for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7310	// (and (load x) 1) is not matched as a single instruction, rather as a LDR
7311	// followed by an AND.
7312	// TODO: Look into removing this restriction by fixing backends to either
7313	// return false for isLoadExtLegal for i1 or have them select this pattern to
7314	// a single instruction.
7315	//
7316	// Also avoid hoisting if we didn't see any ands with the exact DemandBits
7317	// mask, since these are the only ands that will be removed by isel.
7318	if (ActiveBits <= `1` \|\| !DemandBits.isMask(numBits: ActiveBits) \|\|
7319	WidestAndBits != DemandBits)
7320	return false;
7321
7322	LLVMContext &Ctx = Load->getType()->getContext();
7323	Type *TruncTy = Type::getIntNTy(C&: Ctx, N: ActiveBits);
7324	EVT TruncVT = TLI->getValueType(DL: *DL, Ty: TruncTy);
7325
7326	// Reject cases that won't be matched as extloads.
7327	if (!LoadResultVT.bitsGT(VT: TruncVT) \|\| !TruncVT.isRound() \|\|
7328	!TLI->isLoadExtLegal(ExtType: ISD::ZEXTLOAD, ValVT: LoadResultVT, MemVT: TruncVT))
7329	return false;
7330
7331	IRBuilder<> Builder(Load->getNextNonDebugInstruction());
7332	auto *NewAnd = cast<Instruction>(
7333	Val: Builder.CreateAnd(LHS: Load, RHS: ConstantInt::get(Context&: Ctx, V: DemandBits)));
7334	// Mark this instruction as "inserted by CGP", so that other
7335	// optimizations don't touch it.
7336	InsertedInsts.insert(Ptr: NewAnd);
7337
7338	// Replace all uses of load with new and (except for the use of load in the
7339	// new and itself).
7340	replaceAllUsesWith(Old: Load, New: NewAnd, FreshBBs, IsHuge: IsHugeFunc);
7341	NewAnd->setOperand(i: `0`, Val: Load);
7342
7343	// Remove any and instructions that are now redundant.
7344	for (auto *And : AndsToMaybeRemove)
7345	// Check that the and mask is the same as the one we decided to put on the
7346	// new and.
7347	if (cast<ConstantInt>(Val: And->getOperand(i: `1`))->getValue() == DemandBits) {
7348	replaceAllUsesWith(Old: And, New: NewAnd, FreshBBs, IsHuge: IsHugeFunc);
7349	if (&*CurInstIterator == And)
7350	CurInstIterator = std::next(x: And->getIterator());
7351	And->eraseFromParent();
7352	++NumAndUses;
7353	}
7354
7355	// NSW flags may not longer hold.
7356	for (auto *Inst : DropFlags)
7357	Inst->setHasNoSignedWrap(false);
7358
7359	++NumAndsAdded;
7360	return true;
7361	}
7362
7363	/// Check if V (an operand of a select instruction) is an expensive instruction
7364	/// that is only used once.
7365	static bool sinkSelectOperand(const TargetTransformInfo TTI, Value V) {
7366	auto *I = dyn_cast<Instruction>(Val: V);
7367	// If it's safe to speculatively execute, then it should not have side
7368	// effects; therefore, it's safe to sink and possibly not* execute.*
7369	return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7370	TTI->isExpensiveToSpeculativelyExecute(I);
7371	}
7372
7373	/// Returns true if a SelectInst should be turned into an explicit branch.
7374	static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
7375	const TargetLowering *TLI,
7376	SelectInst *SI) {
7377	// If even a predictable select is cheap, then a branch can't be cheaper.
7378	if (!TLI->isPredictableSelectExpensive())
7379	return false;
7380
7381	// FIXME: This should use the same heuristics as IfConversion to determine
7382	// whether a select is better represented as a branch.
7383
7384	// If metadata tells us that the select condition is obviously predictable,
7385	// then we want to replace the select with a branch.
7386	uint64_t TrueWeight, FalseWeight;
7387	if (extractBranchWeights(I: *SI, TrueVal&: TrueWeight, FalseVal&: FalseWeight)) {
7388	uint64_t Max = std::max(a: TrueWeight, b: FalseWeight);
7389	uint64_t Sum = TrueWeight + FalseWeight;
7390	if (Sum != `0`) {
7391	auto Probability = BranchProbability::getBranchProbability(Numerator: Max, Denominator: Sum);
7392	if (Probability > TTI->getPredictableBranchThreshold())
7393	return true;
7394	}
7395	}
7396
7397	CmpInst *Cmp = dyn_cast<CmpInst>(Val: SI->getCondition());
7398
7399	// If a branch is predictable, an out-of-order CPU can avoid blocking on its
7400	// comparison condition. If the compare has more than one use, there's
7401	// probably another cmov or setcc around, so it's not worth emitting a branch.
7402	if (!Cmp \|\| !Cmp->hasOneUse())
7403	return false;
7404
7405	// If either operand of the select is expensive and only needed on one side
7406	// of the select, we should form a branch.
7407	if (sinkSelectOperand(TTI, V: SI->getTrueValue()) \|\|
7408	sinkSelectOperand(TTI, V: SI->getFalseValue()))
7409	return true;
7410
7411	return false;
7412	}
7413
7414	/// If \p isTrue is true, return the true value of \p SI, otherwise return
7415	/// false value of \p SI. If the true/false value of \p SI is defined by any
7416	/// select instructions in \p Selects, look through the defining select
7417	/// instruction until the true/false value is not defined in \p Selects.
7418	static Value *
7419	getTrueOrFalseValue(SelectInst SI, bool* isTrue,
7420	const SmallPtrSet<const Instruction *, `2`> &Selects) {
7421	Value V = nullptr*;
7422
7423	for (SelectInst DefSI = SI; DefSI != nullptr* && Selects.count(Ptr: DefSI);
7424	DefSI = dyn_cast<SelectInst>(Val: V)) {
7425	assert(DefSI->getCondition() == SI->getCondition() &&
7426	"The condition of DefSI does not match with SI");
7427	V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7428	}
7429
7430	assert(V && "Failed to get select true/false value");
7431	return V;
7432	}
7433
7434	bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7435	assert(Shift->isShift() && "Expected a shift");
7436
7437	// If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7438	// general vector shifts, and (3) the shift amount is a select-of-splatted
7439	// values, hoist the shifts before the select:
7440	// shift Op0, (select Cond, TVal, FVal) -->
7441	// select Cond, (shift Op0, TVal), (shift Op0, FVal)
7442	//
7443	// This is inverting a generic IR transform when we know that the cost of a
7444	// general vector shift is more than the cost of 2 shift-by-scalars.
7445	// We can't do this effectively in SDAG because we may not be able to
7446	// determine if the select operands are splats from within a basic block.
7447	Type *Ty = Shift->getType();
7448	if (!Ty->isVectorTy() \|\| !TTI->isVectorShiftByScalarCheap(Ty))
7449	return false;
7450	Value Cond, TVal, *FVal;
7451	if (!match(V: Shift->getOperand(i_nocapture: `1`),
7452	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))))
7453	return false;
7454	if (!isSplatValue(V: TVal) \|\| !isSplatValue(V: FVal))
7455	return false;
7456
7457	IRBuilder<> Builder(Shift);
7458	BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7459	Value *NewTVal = Builder.CreateBinOp(Opc: Opcode, LHS: Shift->getOperand(i_nocapture: `0`), RHS: TVal);
7460	Value *NewFVal = Builder.CreateBinOp(Opc: Opcode, LHS: Shift->getOperand(i_nocapture: `0`), RHS: FVal);
7461	Value *NewSel = Builder.CreateSelect(C: Cond, True: NewTVal, False: NewFVal);
7462	replaceAllUsesWith(Old: Shift, New: NewSel, FreshBBs, IsHuge: IsHugeFunc);
7463	Shift->eraseFromParent();
7464	return true;
7465	}
7466
7467	bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7468	Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7469	assert((Opcode == Intrinsic::fshl \|\| Opcode == Intrinsic::fshr) &&
7470	"Expected a funnel shift");
7471
7472	// If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7473	// than general vector shifts, and (3) the shift amount is select-of-splatted
7474	// values, hoist the funnel shifts before the select:
7475	// fsh Op0, Op1, (select Cond, TVal, FVal) -->
7476	// select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7477	//
7478	// This is inverting a generic IR transform when we know that the cost of a
7479	// general vector shift is more than the cost of 2 shift-by-scalars.
7480	// We can't do this effectively in SDAG because we may not be able to
7481	// determine if the select operands are splats from within a basic block.
7482	Type *Ty = Fsh->getType();
7483	if (!Ty->isVectorTy() \|\| !TTI->isVectorShiftByScalarCheap(Ty))
7484	return false;
7485	Value Cond, TVal, *FVal;
7486	if (!match(V: Fsh->getOperand(i_nocapture: `2`),
7487	P: m_OneUse(SubPattern: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))))
7488	return false;
7489	if (!isSplatValue(V: TVal) \|\| !isSplatValue(V: FVal))
7490	return false;
7491
7492	IRBuilder<> Builder(Fsh);
7493	Value X = Fsh->getOperand(i_nocapture: `0`), Y = Fsh->getOperand(i_nocapture: `1`);
7494	Value *NewTVal = Builder.CreateIntrinsic(ID: Opcode, Types: Ty, Args: {X, Y, TVal});
7495	Value *NewFVal = Builder.CreateIntrinsic(ID: Opcode, Types: Ty, Args: {X, Y, FVal});
7496	Value *NewSel = Builder.CreateSelect(C: Cond, True: NewTVal, False: NewFVal);
7497	replaceAllUsesWith(Old: Fsh, New: NewSel, FreshBBs, IsHuge: IsHugeFunc);
7498	Fsh->eraseFromParent();
7499	return true;
7500	}
7501
7502	/// If we have a SelectInst that will likely profit from branch prediction,
7503	/// turn it into a branch.
7504	bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7505	if (DisableSelectToBranch)
7506	return false;
7507
7508	// If the SelectOptimize pass is enabled, selects have already been optimized.
7509	if (!getCGPassBuilderOption().DisableSelectOptimize)
7510	return false;
7511
7512	// Find all consecutive select instructions that share the same condition.
7513	SmallVector<SelectInst *, `2`> ASI;
7514	ASI.push_back(Elt: SI);
7515	for (BasicBlock::iterator It = ++BasicBlock::iterator (SI);
7516	It != SI->getParent()->end(); ++It) {
7517	SelectInst I = dyn_cast<SelectInst>(Val: &It);
7518	if (I && SI->getCondition() == I->getCondition()) {
7519	ASI.push_back(Elt: I);
7520	} else {
7521	break;
7522	}
7523	}
7524
7525	SelectInst *LastSI = ASI.back();
7526	// Increment the current iterator to skip all the rest of select instructions
7527	// because they will be either "not lowered" or "all lowered" to branch.
7528	CurInstIterator = std::next(x: LastSI->getIterator());
7529	// Examine debug-info attached to the consecutive select instructions. They
7530	// won't be individually optimised by optimizeInst, so we need to perform
7531	// DbgVariableRecord maintenence here instead.
7532	for (SelectInst *SI : ArrayRef(ASI).drop_front())
7533	fixupDbgVariableRecordsOnInst(I&: *SI);
7534
7535	bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(Bitwidth: `1`);
7536
7537	// Can we convert the 'select' to CF ?
7538	if (VectorCond \|\| SI->getMetadata(KindID: LLVMContext::MD_unpredictable))
7539	return false;
7540
7541	TargetLowering::SelectSupportKind SelectKind;
7542	if (SI->getType()->isVectorTy())
7543	SelectKind = TargetLowering::ScalarCondVectorVal;
7544	else
7545	SelectKind = TargetLowering::ScalarValSelect;
7546
7547	if (TLI->isSelectSupported(SelectKind) &&
7548	(!isFormingBranchFromSelectProfitable(TTI, TLI, SI) \|\|
7549	llvm::shouldOptimizeForSize(BB: SI->getParent(), PSI, BFI: BFI.get())))
7550	return false;
7551
7552	// The DominatorTree needs to be rebuilt by any consumers after this
7553	// transformation. We simply reset here rather than setting the ModifiedDT
7554	// flag to avoid restarting the function walk in runOnFunction for each
7555	// select optimized.
7556	DT.reset();
7557
7558	// Transform a sequence like this:
7559	// start:
7560	// %cmp = cmp uge i32 %a, %b
7561	// %sel = select i1 %cmp, i32 %c, i32 %d
7562	//
7563	// Into:
7564	// start:
7565	// %cmp = cmp uge i32 %a, %b
7566	// %cmp.frozen = freeze %cmp
7567	// br i1 %cmp.frozen, label %select.true, label %select.false
7568	// select.true:
7569	// br label %select.end
7570	// select.false:
7571	// br label %select.end
7572	// select.end:
7573	// %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7574	//
7575	// %cmp should be frozen, otherwise it may introduce undefined behavior.
7576	// In addition, we may sink instructions that produce %c or %d from
7577	// the entry block into the destination(s) of the new branch.
7578	// If the true or false blocks do not contain a sunken instruction, that
7579	// block and its branch may be optimized away. In that case, one side of the
7580	// first branch will point directly to select.end, and the corresponding PHI
7581	// predecessor block will be the start block.
7582
7583	// Collect values that go on the true side and the values that go on the false
7584	// side.
7585	SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7586	for (SelectInst *SI : ASI) {
7587	if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7588	TrueInstrs.push_back(Elt: cast<Instruction>(Val: V));
7589	if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7590	FalseInstrs.push_back(Elt: cast<Instruction>(Val: V));
7591	}
7592
7593	// Split the select block, according to how many (if any) values go on each
7594	// side.
7595	BasicBlock *StartBlock = SI->getParent();
7596	BasicBlock::iterator SplitPt = std::next(x: BasicBlock::iterator (LastSI));
7597	// We should split before any debug-info.
7598	SplitPt.setHeadBit(true);
7599
7600	IRBuilder<> IB(SI);
7601	auto *CondFr = IB.CreateFreeze(V: SI->getCondition(), Name: SI->getName() + ".frozen");
7602
7603	BasicBlock TrueBlock = nullptr*;
7604	BasicBlock FalseBlock = nullptr*;
7605	BasicBlock EndBlock = nullptr*;
7606	BranchInst TrueBranch = nullptr*;
7607	BranchInst FalseBranch = nullptr*;
7608	if (TrueInstrs.size() == `0`) {
7609	FalseBranch = cast<BranchInst>(Val: SplitBlockAndInsertIfElse(
7610	Cond: CondFr, SplitBefore: SplitPt, Unreachable: false, BranchWeights: nullptr, DTU: nullptr, LI));
7611	FalseBlock = FalseBranch->getParent();
7612	EndBlock = cast<BasicBlock>(Val: FalseBranch->getOperand(i_nocapture: `0`));
7613	} else if (FalseInstrs.size() == `0`) {
7614	TrueBranch = cast<BranchInst>(Val: SplitBlockAndInsertIfThen(
7615	Cond: CondFr, SplitBefore: SplitPt, Unreachable: false, BranchWeights: nullptr, DTU: nullptr, LI));
7616	TrueBlock = TrueBranch->getParent();
7617	EndBlock = cast<BasicBlock>(Val: TrueBranch->getOperand(i_nocapture: `0`));
7618	} else {
7619	Instruction ThenTerm = nullptr*;
7620	Instruction ElseTerm = nullptr*;
7621	SplitBlockAndInsertIfThenElse(Cond: CondFr, SplitBefore: SplitPt, ThenTerm: &ThenTerm, ElseTerm: &ElseTerm,
7622	BranchWeights: nullptr, DTU: nullptr, LI);
7623	TrueBranch = cast<BranchInst>(Val: ThenTerm);
7624	FalseBranch = cast<BranchInst>(Val: ElseTerm);
7625	TrueBlock = TrueBranch->getParent();
7626	FalseBlock = FalseBranch->getParent();
7627	EndBlock = cast<BasicBlock>(Val: TrueBranch->getOperand(i_nocapture: `0`));
7628	}
7629
7630	EndBlock->setName("select.end");
7631	if (TrueBlock)
7632	TrueBlock->setName("select.true.sink");
7633	if (FalseBlock)
7634	FalseBlock->setName(FalseInstrs.size() == `0` ? "select.false"
7635	: "select.false.sink");
7636
7637	if (IsHugeFunc) {
7638	if (TrueBlock)
7639	FreshBBs.insert(Ptr: TrueBlock);
7640	if (FalseBlock)
7641	FreshBBs.insert(Ptr: FalseBlock);
7642	FreshBBs.insert(Ptr: EndBlock);
7643	}
7644
7645	BFI ->setBlockFreq(BB: EndBlock, Freq: BFI ->getBlockFreq(BB: StartBlock));
7646
7647	static const unsigned MD[] = {
7648	LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7649	LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7650	StartBlock->getTerminator()->copyMetadata(SrcInst: *SI, WL: MD);
7651
7652	// Sink expensive instructions into the conditional blocks to avoid executing
7653	// them speculatively.
7654	for (Instruction *I : TrueInstrs)
7655	I->moveBefore(InsertPos: TrueBranch->getIterator());
7656	for (Instruction *I : FalseInstrs)
7657	I->moveBefore(InsertPos: FalseBranch->getIterator());
7658
7659	// If we did not create a new block for one of the 'true' or 'false' paths
7660	// of the condition, it means that side of the branch goes to the end block
7661	// directly and the path originates from the start block from the point of
7662	// view of the new PHI.
7663	if (TrueBlock == nullptr)
7664	TrueBlock = StartBlock;
7665	else if (FalseBlock == nullptr)
7666	FalseBlock = StartBlock;
7667
7668	SmallPtrSet<const Instruction *, `2`> INS(llvm::from_range, ASI);
7669	// Use reverse iterator because later select may use the value of the
7670	// earlier select, and we need to propagate value through earlier select
7671	// to get the PHI operand.
7672	for (SelectInst *SI : llvm::reverse(C&: ASI)) {
7673	// The select itself is replaced with a PHI Node.
7674	PHINode *PN = PHINode::Create(Ty: SI->getType(), NumReservedValues: `2`, NameStr: "");
7675	PN->insertBefore(InsertPos: EndBlock->begin());
7676	PN->takeName(V: SI);
7677	PN->addIncoming(V: getTrueOrFalseValue(SI, isTrue: true, Selects: INS), BB: TrueBlock);
7678	PN->addIncoming(V: getTrueOrFalseValue(SI, isTrue: false, Selects: INS), BB: FalseBlock);
7679	PN->setDebugLoc(SI->getDebugLoc());
7680
7681	replaceAllUsesWith(Old: SI, New: PN, FreshBBs, IsHuge: IsHugeFunc);
7682	SI->eraseFromParent();
7683	INS.erase(Ptr: SI);
7684	++NumSelectsExpanded;
7685	}
7686
7687	// Instruct OptimizeBlock to skip to the next block.
7688	CurInstIterator = StartBlock->end();
7689	return true;
7690	}
7691
7692	/// Some targets only accept certain types for splat inputs. For example a VDUP
7693	/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7694	/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7695	bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7696	// Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7697	if (!match(V: SVI, P: m_Shuffle(v1: m_InsertElt(Val: m_Undef(), Elt: m_Value(), Idx: m_ZeroInt()),
7698	v2: m_Undef(), mask: m_ZeroMask ())))
7699	return false;
7700	Type *NewType = TLI->shouldConvertSplatType(SVI);
7701	if (!NewType)
7702	return false;
7703
7704	auto *SVIVecType = cast<FixedVectorType>(Val: SVI->getType());
7705	assert(!NewType->isVectorTy() && "Expected a scalar type!");
7706	assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7707	"Expected a type of the same size!");
7708	auto *NewVecType =
7709	FixedVectorType::get(ElementType: NewType, NumElts: SVIVecType->getNumElements());
7710
7711	// Create a bitcast (shuffle (insert (bitcast(..))))
7712	IRBuilder<> Builder(SVI->getContext());
7713	Builder.SetInsertPoint(SVI);
7714	Value *BC1 = Builder.CreateBitCast(
7715	V: cast<Instruction>(Val: SVI->getOperand(i_nocapture: `0`))->getOperand(i: `1`), DestTy: NewType);
7716	Value *Shuffle = Builder.CreateVectorSplat(NumElts: NewVecType->getNumElements(), V: BC1);
7717	Value *BC2 = Builder.CreateBitCast(V: Shuffle, DestTy: SVIVecType);
7718
7719	replaceAllUsesWith(Old: SVI, New: BC2, FreshBBs, IsHuge: IsHugeFunc);
7720	RecursivelyDeleteTriviallyDeadInstructions(
7721	V: SVI, TLI: TLInfo, MSSAU: nullptr,
7722	AboutToDeleteCallback: [&](Value *V) { removeAllAssertingVHReferences(V); });
7723
7724	// Also hoist the bitcast up to its operand if it they are not in the same
7725	// block.
7726	if (auto *BCI = dyn_cast<Instruction>(Val: BC1))
7727	if (auto *Op = dyn_cast<Instruction>(Val: BCI->getOperand(i: `0`)))
7728	if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Val: Op) &&
7729	!Op->isTerminator() && !Op->isEHPad())
7730	BCI->moveAfter(MovePos: Op);
7731
7732	return true;
7733	}
7734
7735	bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7736	// If the operands of I can be folded into a target instruction together with
7737	// I, duplicate and sink them.
7738	SmallVector<Use *, `4`> OpsToSink;
7739	if (!TTI->isProfitableToSinkOperands(I, Ops&: OpsToSink))
7740	return false;
7741
7742	// OpsToSink can contain multiple uses in a use chain (e.g.
7743	// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7744	// uses must come first, so we process the ops in reverse order so as to not
7745	// create invalid IR.
7746	BasicBlock *TargetBB = I->getParent();
7747	bool Changed = false;
7748	SmallVector<Use *, `4`> ToReplace;
7749	Instruction *InsertPoint = I;
7750	DenseMap<const Instruction , unsigned* long> InstOrdering;
7751	unsigned long InstNumber = `0`;
7752	for (const auto &I : *TargetBB)
7753	InstOrdering [&I] = InstNumber++;
7754
7755	for (Use *U : reverse(C&: OpsToSink)) {
7756	auto *UI = cast<Instruction>(Val: U->get());
7757	if (isa<PHINode>(Val: UI))
7758	continue;
7759	if (UI->getParent() == TargetBB) {
7760	if (InstOrdering [UI] < InstOrdering [InsertPoint])
7761	InsertPoint = UI;
7762	continue;
7763	}
7764	ToReplace.push_back(Elt: U);
7765	}
7766
7767	SetVector<Instruction *> MaybeDead;
7768	DenseMap<Instruction , Instruction > NewInstructions;
7769	for (Use *U : ToReplace) {
7770	auto *UI = cast<Instruction>(Val: U->get());
7771	Instruction *NI = UI->clone();
7772
7773	if (IsHugeFunc) {
7774	// Now we clone an instruction, its operands' defs may sink to this BB
7775	// now. So we put the operands defs' BBs into FreshBBs to do optimization.
7776	for (Value *Op : NI->operands())
7777	if (auto *OpDef = dyn_cast<Instruction>(Val: Op))
7778	FreshBBs.insert(Ptr: OpDef->getParent());
7779	}
7780
7781	NewInstructions [UI] = NI;
7782	MaybeDead.insert(X: UI);
7783	LLVM_DEBUG(dbgs() << "Sinking " << UI << " to user " << I << "\n");
7784	NI->insertBefore(InsertPos: InsertPoint->getIterator());
7785	InsertPoint = NI;
7786	InsertedInsts.insert(Ptr: NI);
7787
7788	// Update the use for the new instruction, making sure that we update the
7789	// sunk instruction uses, if it is part of a chain that has already been
7790	// sunk.
7791	Instruction *OldI = cast<Instruction>(Val: U->getUser());
7792	if (auto It = NewInstructions.find(Val: OldI); It != NewInstructions.end())
7793	It ->second->setOperand(i: U->getOperandNo(), Val: NI);
7794	else
7795	U->set(NI);
7796	Changed = true;
7797	}
7798
7799	// Remove instructions that are dead after sinking.
7800	for (auto *I : MaybeDead) {
7801	if (!I->hasNUsesOrMore(N: `1`)) {
7802	LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7803	I->eraseFromParent();
7804	}
7805	}
7806
7807	return Changed;
7808	}
7809
7810	bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7811	Value *Cond = SI->getCondition();
7812	Type *OldType = Cond->getType();
7813	LLVMContext &Context = Cond->getContext();
7814	EVT OldVT = TLI->getValueType(DL: *DL, Ty: OldType);
7815	MVT RegType = TLI->getPreferredSwitchConditionType(Context, ConditionVT: OldVT);
7816	unsigned RegWidth = RegType.getSizeInBits();
7817
7818	if (RegWidth <= cast<IntegerType>(Val: OldType)->getBitWidth())
7819	return false;
7820
7821	// If the register width is greater than the type width, expand the condition
7822	// of the switch instruction and each case constant to the width of the
7823	// register. By widening the type of the switch condition, subsequent
7824	// comparisons (for case comparisons) will not need to be extended to the
7825	// preferred register width, so we will potentially eliminate N-1 extends,
7826	// where N is the number of cases in the switch.
7827	auto *NewType = Type::getIntNTy(C&: Context, N: RegWidth);
7828
7829	// Extend the switch condition and case constants using the target preferred
7830	// extend unless the switch condition is a function argument with an extend
7831	// attribute. In that case, we can avoid an unnecessary mask/extension by
7832	// matching the argument extension instead.
7833	Instruction::CastOps ExtType = Instruction::ZExt;
7834	// Some targets prefer SExt over ZExt.
7835	if (TLI->isSExtCheaperThanZExt(FromTy: OldVT, ToTy: RegType))
7836	ExtType = Instruction::SExt;
7837
7838	if (auto *Arg = dyn_cast<Argument>(Val: Cond)) {
7839	if (Arg->hasSExtAttr())
7840	ExtType = Instruction::SExt;
7841	if (Arg->hasZExtAttr())
7842	ExtType = Instruction::ZExt;
7843	}
7844
7845	auto *ExtInst = CastInst::Create(ExtType, S: Cond, Ty: NewType);
7846	ExtInst->insertBefore(InsertPos: SI->getIterator());
7847	ExtInst->setDebugLoc(SI->getDebugLoc());
7848	SI->setCondition(ExtInst);
7849	for (auto Case : SI->cases()) {
7850	const APInt &NarrowConst = Case.getCaseValue()->getValue();
7851	APInt WideConst = (ExtType == Instruction::ZExt)
7852	? NarrowConst.zext(width: RegWidth)
7853	: NarrowConst.sext(width: RegWidth);
7854	Case.setValue(ConstantInt::get(Context, V: WideConst));
7855	}
7856
7857	return true;
7858	}
7859
7860	bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7861	// The SCCP optimization tends to produce code like this:
7862	// switch(x) { case 42: phi(42, ...) }
7863	// Materializing the constant for the phi-argument needs instructions; So we
7864	// change the code to:
7865	// switch(x) { case 42: phi(x, ...) }
7866
7867	Value *Condition = SI->getCondition();
7868	// Avoid endless loop in degenerate case.
7869	if (isa<ConstantInt>(Val: *Condition))
7870	return false;
7871
7872	bool Changed = false;
7873	BasicBlock *SwitchBB = SI->getParent();
7874	Type *ConditionType = Condition->getType();
7875
7876	for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7877	ConstantInt *CaseValue = Case.getCaseValue();
7878	BasicBlock *CaseBB = Case.getCaseSuccessor();
7879	// Set to true if we previously checked that `CaseBB` is only reached by
7880	// a single case from this switch.
7881	bool CheckedForSinglePred = false;
7882	for (PHINode &PHI : CaseBB->phis()) {
7883	Type *PHIType = PHI.getType();
7884	// If ZExt is free then we can also catch patterns like this:
7885	// switch((i32)x) { case 42: phi((i64)42, ...); }
7886	// and replace `(i64)42` with `zext i32 %x to i64`.
7887	bool TryZExt =
7888	PHIType->isIntegerTy() &&
7889	PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7890	TLI->isZExtFree(FromTy: ConditionType, ToTy: PHIType);
7891	if (PHIType == ConditionType \|\| TryZExt) {
7892	// Set to true to skip this case because of multiple preds.
7893	bool SkipCase = false;
7894	Value Replacement = nullptr*;
7895	for (unsigned I = `0`, E = PHI.getNumIncomingValues(); I != E; I++) {
7896	Value *PHIValue = PHI.getIncomingValue(i: I);
7897	if (PHIValue != CaseValue) {
7898	if (!TryZExt)
7899	continue;
7900	ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(Val: PHIValue);
7901	if (!PHIValueInt \|\|
7902	PHIValueInt->getValue() !=
7903	CaseValue->getValue().zext(width: PHIType->getIntegerBitWidth()))
7904	continue;
7905	}
7906	if (PHI.getIncomingBlock(i: I) != SwitchBB)
7907	continue;
7908	// We cannot optimize if there are multiple case labels jumping to
7909	// this block. This check may get expensive when there are many
7910	// case labels so we test for it last.
7911	if (!CheckedForSinglePred) {
7912	CheckedForSinglePred = true;
7913	if (SI->findCaseDest(BB: CaseBB) == nullptr) {
7914	SkipCase = true;
7915	break;
7916	}
7917	}
7918
7919	if (Replacement == nullptr) {
7920	if (PHIValue == CaseValue) {
7921	Replacement = Condition;
7922	} else {
7923	IRBuilder<> Builder(SI);
7924	Replacement = Builder.CreateZExt(V: Condition, DestTy: PHIType);
7925	}
7926	}
7927	PHI.setIncomingValue(i: I, V: Replacement);
7928	Changed = true;
7929	}
7930	if (SkipCase)
7931	break;
7932	}
7933	}
7934	}
7935	return Changed;
7936	}
7937
7938	bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7939	bool Changed = optimizeSwitchType(SI);
7940	Changed \|= optimizeSwitchPhiConstants(SI);
7941	return Changed;
7942	}
7943
7944	namespace {
7945
7946	/// Helper class to promote a scalar operation to a vector one.
7947	/// This class is used to move downward extractelement transition.
7948	/// E.g.,
7949	/// a = vector_op <2 x i32>
7950	/// b = extractelement <2 x i32> a, i32 0
7951	/// c = scalar_op b
7952	/// store c
7953	///
7954	/// =>
7955	/// a = vector_op <2 x i32>
7956	/// c = vector_op a (equivalent to scalar_op on the related lane)
7957	/// d = extractelement <2 x i32> c, i32 0*
7958	/// store d*
7959	/// Assuming both extractelement and store can be combine, we get rid of the
7960	/// transition.
7961	class VectorPromoteHelper {
7962	/// DataLayout associated with the current module.
7963	const DataLayout &DL;
7964
7965	/// Used to perform some checks on the legality of vector operations.
7966	const TargetLowering &TLI;
7967
7968	/// Used to estimated the cost of the promoted chain.
7969	const TargetTransformInfo &TTI;
7970
7971	/// The transition being moved downwards.
7972	Instruction *Transition;
7973
7974	/// The sequence of instructions to be promoted.
7975	SmallVector<Instruction *, `4`> InstsToBePromoted;
7976
7977	/// Cost of combining a store and an extract.
7978	unsigned StoreExtractCombineCost;
7979
7980	/// Instruction that will be combined with the transition.
7981	Instruction CombineInst = nullptr*;
7982
7983	/// The instruction that represents the current end of the transition.
7984	/// Since we are faking the promotion until we reach the end of the chain
7985	/// of computation, we need a way to get the current end of the transition.
7986	Instruction getEndOfTransition() const* {
7987	if (InstsToBePromoted.empty())
7988	return Transition;
7989	return InstsToBePromoted.back();
7990	}
7991
7992	/// Return the index of the original value in the transition.
7993	/// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7994	/// c, is at index 0.
7995	unsigned getTransitionOriginalValueIdx() const {
7996	assert(isa<ExtractElementInst>(Transition) &&
7997	"Other kind of transitions are not supported yet");
7998	return `0`;
7999	}
8000
8001	/// Return the index of the index in the transition.
8002	/// E.g., for "extractelement <2 x i32> c, i32 0" the index
8003	/// is at index 1.
8004	unsigned getTransitionIdx() const {
8005	assert(isa<ExtractElementInst>(Transition) &&
8006	"Other kind of transitions are not supported yet");
8007	return `1`;
8008	}
8009
8010	/// Get the type of the transition.
8011	/// This is the type of the original value.
8012	/// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8013	/// transition is <2 x i32>.
8014	Type getTransitionType() const* {
8015	return Transition->getOperand(i: getTransitionOriginalValueIdx())->getType();
8016	}
8017
8018	/// Promote \p ToBePromoted by moving \p Def downward through.
8019	/// I.e., we have the following sequence:
8020	/// Def = Transition <ty1> a to <ty2>
8021	/// b = ToBePromoted <ty2> Def, ...
8022	/// =>
8023	/// b = ToBePromoted <ty1> a, ...
8024	/// Def = Transition <ty1> ToBePromoted to <ty2>
8025	void promoteImpl(Instruction *ToBePromoted);
8026
8027	/// Check whether or not it is profitable to promote all the
8028	/// instructions enqueued to be promoted.
8029	bool isProfitableToPromote() {
8030	Value *ValIdx = Transition->getOperand(i: getTransitionOriginalValueIdx());
8031	unsigned Index = isa<ConstantInt>(Val: ValIdx)
8032	? cast<ConstantInt>(Val: ValIdx)->getZExtValue()
8033	: -`1`;
8034	Type *PromotedType = getTransitionType();
8035
8036	StoreInst *ST = cast<StoreInst>(Val: CombineInst);
8037	unsigned AS = ST->getPointerAddressSpace();
8038	// Check if this store is supported.
8039	if (!TLI.allowsMisalignedMemoryAccesses(
8040	TLI.getValueType(DL, Ty: ST->getValueOperand()->getType()), AddrSpace: AS,
8041	Alignment: ST->getAlign())) {
8042	// If this is not supported, there is no way we can combine
8043	// the extract with the store.
8044	return false;
8045	}
8046
8047	// The scalar chain of computation has to pay for the transition
8048	// scalar to vector.
8049	// The vector chain has to account for the combining cost.
8050	enum TargetTransformInfo::TargetCostKind CostKind =
8051	TargetTransformInfo::TCK_RecipThroughput;
8052	InstructionCost ScalarCost =
8053	TTI.getVectorInstrCost(I: *Transition, Val: PromotedType, CostKind, Index);
8054	InstructionCost VectorCost = StoreExtractCombineCost;
8055	for (const auto &Inst : InstsToBePromoted) {
8056	// Compute the cost.
8057	// By construction, all instructions being promoted are arithmetic ones.
8058	// Moreover, one argument is a constant that can be viewed as a splat
8059	// constant.
8060	Value *Arg0 = Inst->getOperand(i: `0`);
8061	bool IsArg0Constant = isa<UndefValue>(Val: Arg0) \|\| isa<ConstantInt>(Val: Arg0) \|\|
8062	isa<ConstantFP>(Val: Arg0);
8063	TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8064	if (IsArg0Constant)
8065	Arg0Info.Kind = TargetTransformInfo::OK_UniformConstantValue;
8066	else
8067	Arg1Info.Kind = TargetTransformInfo::OK_UniformConstantValue;
8068
8069	ScalarCost += TTI.getArithmeticInstrCost(
8070	Opcode: Inst->getOpcode(), Ty: Inst->getType(), CostKind, Opd1Info: Arg0Info, Opd2Info: Arg1Info);
8071	VectorCost += TTI.getArithmeticInstrCost(Opcode: Inst->getOpcode(), Ty: PromotedType,
8072	CostKind, Opd1Info: Arg0Info, Opd2Info: Arg1Info);
8073	}
8074	LLVM_DEBUG(
8075	dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8076	<< ScalarCost << "\nVector: " << VectorCost << `'\n'`);
8077	return ScalarCost > VectorCost;
8078	}
8079
8080	/// Generate a constant vector with \p Val with the same
8081	/// number of elements as the transition.
8082	/// \p UseSplat defines whether or not \p Val should be replicated
8083	/// across the whole vector.
8084	/// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8085	/// otherwise we generate a vector with as many poison as possible:
8086	/// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8087	/// used at the index of the extract.
8088	Value getConstantVector(Constant Val, bool UseSplat) const {
8089	unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8090	if (!UseSplat) {
8091	// If we cannot determine where the constant must be, we have to
8092	// use a splat constant.
8093	Value *ValExtractIdx = Transition->getOperand(i: getTransitionIdx());
8094	if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Val: ValExtractIdx))
8095	ExtractIdx = CstVal->getSExtValue();
8096	else
8097	UseSplat = true;
8098	}
8099
8100	ElementCount EC = cast<VectorType>(Val: getTransitionType())->getElementCount();
8101	if (UseSplat)
8102	return ConstantVector::getSplat(EC, Elt: Val);
8103
8104	if (!EC.isScalable()) {
8105	SmallVector<Constant *, `4`> ConstVec;
8106	PoisonValue *PoisonVal = PoisonValue::get(T: Val->getType());
8107	for (unsigned Idx = `0`; Idx != EC.getKnownMinValue(); ++Idx) {
8108	if (Idx == ExtractIdx)
8109	ConstVec.push_back(Elt: Val);
8110	else
8111	ConstVec.push_back(Elt: PoisonVal);
8112	}
8113	return ConstantVector::get(V: ConstVec);
8114	} else
8115	llvm_unreachable(
8116	"Generate scalable vector for non-splat is unimplemented");
8117	}
8118
8119	/// Check if promoting to a vector type an operand at \p OperandIdx
8120	/// in \p Use can trigger undefined behavior.
8121	static bool canCauseUndefinedBehavior(const Instruction *Use,
8122	unsigned OperandIdx) {
8123	// This is not safe to introduce undef when the operand is on
8124	// the right hand side of a division-like instruction.
8125	if (OperandIdx != `1`)
8126	return false;
8127	switch (Use->getOpcode()) {
8128	default:
8129	return false;
8130	case Instruction::SDiv:
8131	case Instruction::UDiv:
8132	case Instruction::SRem:
8133	case Instruction::URem:
8134	return true;
8135	case Instruction::FDiv:
8136	case Instruction::FRem:
8137	return !Use->hasNoNaNs();
8138	}
8139	llvm_unreachable(nullptr);
8140	}
8141
8142	public:
8143	VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8144	const TargetTransformInfo &TTI, Instruction *Transition,
8145	unsigned CombineCost)
8146	: DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8147	StoreExtractCombineCost(CombineCost) {
8148	assert(Transition && "Do not know how to promote null");
8149	}
8150
8151	/// Check if we can promote \p ToBePromoted to \p Type.
8152	bool canPromote(const Instruction ToBePromoted) const* {
8153	// We could support CastInst too.
8154	return isa<BinaryOperator>(Val: ToBePromoted);
8155	}
8156
8157	/// Check if it is profitable to promote \p ToBePromoted
8158	/// by moving downward the transition through.
8159	bool shouldPromote(const Instruction ToBePromoted) const* {
8160	// Promote only if all the operands can be statically expanded.
8161	// Indeed, we do not want to introduce any new kind of transitions.
8162	for (const Use &U : ToBePromoted->operands()) {
8163	const Value *Val = U.get();
8164	if (Val == getEndOfTransition()) {
8165	// If the use is a division and the transition is on the rhs,
8166	// we cannot promote the operation, otherwise we may create a
8167	// division by zero.
8168	if (canCauseUndefinedBehavior(Use: ToBePromoted, OperandIdx: U.getOperandNo()))
8169	return false;
8170	continue;
8171	}
8172	if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8173	!isa<ConstantFP>(Val))
8174	return false;
8175	}
8176	// Check that the resulting operation is legal.
8177	int ISDOpcode = TLI.InstructionOpcodeToISD(Opcode: ToBePromoted->getOpcode());
8178	if (!ISDOpcode)
8179	return false;
8180	return StressStoreExtract \|\|
8181	TLI.isOperationLegalOrCustom(
8182	Op: ISDOpcode, VT: TLI.getValueType(DL, Ty: getTransitionType(), AllowUnknown: true));
8183	}
8184
8185	/// Check whether or not \p Use can be combined
8186	/// with the transition.
8187	/// I.e., is it possible to do Use(Transition) => AnotherUse?
8188	bool canCombine(const Instruction Use) { return* isa<StoreInst>(Val: Use); }
8189
8190	/// Record \p ToBePromoted as part of the chain to be promoted.
8191	void enqueueForPromotion(Instruction *ToBePromoted) {
8192	InstsToBePromoted.push_back(Elt: ToBePromoted);
8193	}
8194
8195	/// Set the instruction that will be combined with the transition.
8196	void recordCombineInstruction(Instruction *ToBeCombined) {
8197	assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8198	CombineInst = ToBeCombined;
8199	}
8200
8201	/// Promote all the instructions enqueued for promotion if it is
8202	/// is profitable.
8203	/// \return True if the promotion happened, false otherwise.
8204	bool promote() {
8205	// Check if there is something to promote.
8206	// Right now, if we do not have anything to combine with,
8207	// we assume the promotion is not profitable.
8208	if (InstsToBePromoted.empty() \|\| !CombineInst)
8209	return false;
8210
8211	// Check cost.
8212	if (!StressStoreExtract && !isProfitableToPromote())
8213	return false;
8214
8215	// Promote.
8216	for (auto &ToBePromoted : InstsToBePromoted)
8217	promoteImpl(ToBePromoted);
8218	InstsToBePromoted.clear();
8219	return true;
8220	}
8221	};
8222
8223	} // end anonymous namespace
8224
8225	void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8226	// At this point, we know that all the operands of ToBePromoted but Def
8227	// can be statically promoted.
8228	// For Def, we need to use its parameter in ToBePromoted:
8229	// b = ToBePromoted ty1 a
8230	// Def = Transition ty1 b to ty2
8231	// Move the transition down.
8232	// 1. Replace all uses of the promoted operation by the transition.
8233	// = ... b => = ... Def.
8234	assert(ToBePromoted->getType() == Transition->getType() &&
8235	"The type of the result of the transition does not match "
8236	"the final type");
8237	ToBePromoted->replaceAllUsesWith(V: Transition);
8238	// 2. Update the type of the uses.
8239	// b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8240	Type *TransitionTy = getTransitionType();
8241	ToBePromoted->mutateType(Ty: TransitionTy);
8242	// 3. Update all the operands of the promoted operation with promoted
8243	// operands.
8244	// b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8245	for (Use &U : ToBePromoted->operands()) {
8246	Value *Val = U.get();
8247	Value NewVal = nullptr*;
8248	if (Val == Transition)
8249	NewVal = Transition->getOperand(i: getTransitionOriginalValueIdx());
8250	else if (isa<UndefValue>(Val) \|\| isa<ConstantInt>(Val) \|\|
8251	isa<ConstantFP>(Val)) {
8252	// Use a splat constant if it is not safe to use undef.
8253	NewVal = getConstantVector(
8254	Val: cast<Constant>(Val),
8255	UseSplat: isa<UndefValue>(Val) \|\|
8256	canCauseUndefinedBehavior(Use: ToBePromoted, OperandIdx: U.getOperandNo()));
8257	} else
8258	llvm_unreachable("Did you modified shouldPromote and forgot to update "
8259	"this?");
8260	ToBePromoted->setOperand(i: U.getOperandNo(), Val: NewVal);
8261	}
8262	Transition->moveAfter(MovePos: ToBePromoted);
8263	Transition->setOperand(i: getTransitionOriginalValueIdx(), Val: ToBePromoted);
8264	}
8265
8266	/// Some targets can do store(extractelement) with one instruction.
8267	/// Try to push the extractelement towards the stores when the target
8268	/// has this feature and this is profitable.
8269	bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8270	unsigned CombineCost = std::numeric_limits<unsigned>::max();
8271	if (DisableStoreExtract \|\|
8272	(!StressStoreExtract &&
8273	!TLI->canCombineStoreAndExtract(VectorTy: Inst->getOperand(i: `0`)->getType(),
8274	Idx: Inst->getOperand(i: `1`), Cost&: CombineCost)))
8275	return false;
8276
8277	// At this point we know that Inst is a vector to scalar transition.
8278	// Try to move it down the def-use chain, until:
8279	// - We can combine the transition with its single use
8280	// => we got rid of the transition.
8281	// - We escape the current basic block
8282	// => we would need to check that we are moving it at a cheaper place and
8283	// we do not do that for now.
8284	BasicBlock *Parent = Inst->getParent();
8285	LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << `'\n'`);
8286	VectorPromoteHelper VPH(DL, TLI, *TTI, Inst, CombineCost);
8287	// If the transition has more than one use, assume this is not going to be
8288	// beneficial.
8289	while (Inst->hasOneUse()) {
8290	Instruction ToBePromoted = cast<Instruction>(Val: Inst->user_begin());
8291	LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << `'\n'`);
8292
8293	if (ToBePromoted->getParent() != Parent) {
8294	LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8295	<< ToBePromoted->getParent()->getName()
8296	<< ") than the transition (" << Parent->getName()
8297	<< ").\n");
8298	return false;
8299	}
8300
8301	if (VPH.canCombine(Use: ToBePromoted)) {
8302	LLVM_DEBUG(dbgs() << "Assume " << *Inst << `'\n'`
8303	<< "will be combined with: " << *ToBePromoted << `'\n'`);
8304	VPH.recordCombineInstruction(ToBeCombined: ToBePromoted);
8305	bool Changed = VPH.promote();
8306	NumStoreExtractExposed += Changed;
8307	return Changed;
8308	}
8309
8310	LLVM_DEBUG(dbgs() << "Try promoting.\n");
8311	if (!VPH.canPromote(ToBePromoted) \|\| !VPH.shouldPromote(ToBePromoted))
8312	return false;
8313
8314	LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8315
8316	VPH.enqueueForPromotion(ToBePromoted);
8317	Inst = ToBePromoted;
8318	}
8319	return false;
8320	}
8321
8322	/// For the instruction sequence of store below, F and I values
8323	/// are bundled together as an i64 value before being stored into memory.
8324	/// Sometimes it is more efficient to generate separate stores for F and I,
8325	/// which can remove the bitwise instructions or sink them to colder places.
8326	///
8327	/// (store (or (zext (bitcast F to i32) to i64),
8328	/// (shl (zext I to i64), 32)), addr) -->
8329	/// (store F, addr) and (store I, addr+4)
8330	///
8331	/// Similarly, splitting for other merged store can also be beneficial, like:
8332	/// For pair of {i32, i32}, i64 store --> two i32 stores.
8333	/// For pair of {i32, i16}, i64 store --> two i32 stores.
8334	/// For pair of {i16, i16}, i32 store --> two i16 stores.
8335	/// For pair of {i16, i8}, i32 store --> two i16 stores.
8336	/// For pair of {i8, i8}, i16 store --> two i8 stores.
8337	///
8338	/// We allow each target to determine specifically which kind of splitting is
8339	/// supported.
8340	///
8341	/// The store patterns are commonly seen from the simple code snippet below
8342	/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8343	/// void goo(const std::pair<int, float> &);
8344	/// hoo() {
8345	/// ...
8346	/// goo(std::make_pair(tmp, ftmp));
8347	/// ...
8348	/// }
8349	///
8350	/// Although we already have similar splitting in DAG Combine, we duplicate
8351	/// it in CodeGenPrepare to catch the case in which pattern is across
8352	/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8353	/// during code expansion.
8354	static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
8355	const TargetLowering &TLI) {
8356	// Handle simple but common cases only.
8357	Type *StoreType = SI.getValueOperand()->getType();
8358
8359	// The code below assumes shifting a value by <number of bits>,
8360	// whereas scalable vectors would have to be shifted by
8361	// <2log(vscale) + number of bits> in order to store the
8362	// low/high parts. Bailing out for now.
8363	if (StoreType->isScalableTy())
8364	return false;
8365
8366	if (!DL.typeSizeEqualsStoreSize(Ty: StoreType) \|\|
8367	DL.getTypeSizeInBits(Ty: StoreType) == `0`)
8368	return false;
8369
8370	unsigned HalfValBitSize = DL.getTypeSizeInBits(Ty: StoreType) / `2`;
8371	Type *SplitStoreType = Type::getIntNTy(C&: SI.getContext(), N: HalfValBitSize);
8372	if (!DL.typeSizeEqualsStoreSize(Ty: SplitStoreType))
8373	return false;
8374
8375	// Don't split the store if it is volatile.
8376	if (SI.isVolatile())
8377	return false;
8378
8379	// Match the following patterns:
8380	// (store (or (zext LValue to i64),
8381	// (shl (zext HValue to i64), 32)), HalfValBitSize)
8382	// or
8383	// (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8384	// (zext LValue to i64),
8385	// Expect both operands of OR and the first operand of SHL have only
8386	// one use.
8387	Value LValue, HValue;
8388	if (!match(V: SI.getValueOperand(),
8389	P: m_c_Or(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: LValue))),
8390	R: m_OneUse(SubPattern: m_Shl(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: HValue))),
8391	R: m_SpecificInt(V: HalfValBitSize))))))
8392	return false;
8393
8394	// Check LValue and HValue are int with size less or equal than 32.
8395	if (!LValue->getType()->isIntegerTy() \|\|
8396	DL.getTypeSizeInBits(Ty: LValue->getType()) > HalfValBitSize \|\|
8397	!HValue->getType()->isIntegerTy() \|\|
8398	DL.getTypeSizeInBits(Ty: HValue->getType()) > HalfValBitSize)
8399	return false;
8400
8401	// If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8402	// as the input of target query.
8403	auto *LBC = dyn_cast<BitCastInst>(Val: LValue);
8404	auto *HBC = dyn_cast<BitCastInst>(Val: HValue);
8405	EVT LowTy = LBC ? EVT::getEVT(Ty: LBC->getOperand(i_nocapture: `0`)->getType())
8406	: EVT::getEVT(Ty: LValue->getType());
8407	EVT HighTy = HBC ? EVT::getEVT(Ty: HBC->getOperand(i_nocapture: `0`)->getType())
8408	: EVT::getEVT(Ty: HValue->getType());
8409	if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LTy: LowTy, HTy: HighTy))
8410	return false;
8411
8412	// Start to split store.
8413	IRBuilder<> Builder(SI.getContext());
8414	Builder.SetInsertPoint(&SI);
8415
8416	// If LValue/HValue is a bitcast in another BB, create a new one in current
8417	// BB so it may be merged with the splitted stores by dag combiner.
8418	if (LBC && LBC->getParent() != SI.getParent())
8419	LValue = Builder.CreateBitCast(V: LBC->getOperand(i_nocapture: `0`), DestTy: LBC->getType());
8420	if (HBC && HBC->getParent() != SI.getParent())
8421	HValue = Builder.CreateBitCast(V: HBC->getOperand(i_nocapture: `0`), DestTy: HBC->getType());
8422
8423	bool IsLE = SI.getDataLayout().isLittleEndian();
8424	auto CreateSplitStore = [&](Value V, bool* Upper) {
8425	V = Builder.CreateZExtOrBitCast(V, DestTy: SplitStoreType);
8426	Value *Addr = SI.getPointerOperand();
8427	Align Alignment = SI.getAlign();
8428	const bool IsOffsetStore = (IsLE && Upper) \|\| (!IsLE && !Upper);
8429	if (IsOffsetStore) {
8430	Addr = Builder.CreateGEP(
8431	Ty: SplitStoreType, Ptr: Addr,
8432	IdxList: ConstantInt::get(Ty: Type::getInt32Ty(C&: SI.getContext()), V: `1`));
8433
8434	// When splitting the store in half, naturally one half will retain the
8435	// alignment of the original wider store, regardless of whether it was
8436	// over-aligned or not, while the other will require adjustment.
8437	Alignment = commonAlignment(A: Alignment, Offset: HalfValBitSize / `8`);
8438	}
8439	Builder.CreateAlignedStore(Val: V, Ptr: Addr, Align: Alignment);
8440	};
8441
8442	CreateSplitStore (LValue, false);
8443	CreateSplitStore (HValue, true);
8444
8445	// Delete the old store.
8446	SI.eraseFromParent();
8447	return true;
8448	}
8449
8450	// Return true if the GEP has two operands, the first operand is of a sequential
8451	// type, and the second operand is a constant.
8452	static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
8453	gep_type_iterator I = gep_type_begin(GEP: *GEP);
8454	return GEP->getNumOperands() == `2` && I.isSequential() &&
8455	isa<ConstantInt>(Val: GEP->getOperand(i_nocapture: `1`));
8456	}
8457
8458	// Try unmerging GEPs to reduce liveness interference (register pressure) across
8459	// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8460	// reducing liveness interference across those edges benefits global register
8461	// allocation. Currently handles only certain cases.
8462	//
8463	// For example, unmerge %GEPI and %UGEPI as below.
8464	//
8465	// ---------- BEFORE ----------
8466	// SrcBlock:
8467	// ...
8468	// %GEPIOp = ...
8469	// ...
8470	// %GEPI = gep %GEPIOp, Idx
8471	// ...
8472	// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8473	// ( %GEPI is alive on the indirectbr edges due to other uses ahead)*
8474	// ( %GEPIOp is alive on the indirectbr edges only because of it's used by*
8475	// %UGEPI)
8476	//
8477	// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8478	// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8479	// ...
8480	//
8481	// DstBi:
8482	// ...
8483	// %UGEPI = gep %GEPIOp, UIdx
8484	// ...
8485	// ---------------------------
8486	//
8487	// ---------- AFTER ----------
8488	// SrcBlock:
8489	// ... (same as above)
8490	// ( %GEPI is still alive on the indirectbr edges)*
8491	// ( %GEPIOp is no longer alive on the indirectbr edges as a result of the*
8492	// unmerging)
8493	// ...
8494	//
8495	// DstBi:
8496	// ...
8497	// %UGEPI = gep %GEPI, (UIdx-Idx)
8498	// ...
8499	// ---------------------------
8500	//
8501	// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8502	// no longer alive on them.
8503	//
8504	// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8505	// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8506	// not to disable further simplications and optimizations as a result of GEP
8507	// merging.
8508	//
8509	// Note this unmerging may increase the length of the data flow critical path
8510	// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8511	// between the register pressure and the length of data-flow critical
8512	// path. Restricting this to the uncommon IndirectBr case would minimize the
8513	// impact of potentially longer critical path, if any, and the impact on compile
8514	// time.
8515	static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
8516	const TargetTransformInfo *TTI) {
8517	BasicBlock *SrcBlock = GEPI->getParent();
8518	// Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8519	// (non-IndirectBr) cases exit early here.
8520	if (!isa<IndirectBrInst>(Val: SrcBlock->getTerminator()))
8521	return false;
8522	// Check that GEPI is a simple gep with a single constant index.
8523	if (!GEPSequentialConstIndexed(GEP: GEPI))
8524	return false;
8525	ConstantInt *GEPIIdx = cast<ConstantInt>(Val: GEPI->getOperand(i_nocapture: `1`));
8526	// Check that GEPI is a cheap one.
8527	if (TTI->getIntImmCost(Imm: GEPIIdx->getValue(), Ty: GEPIIdx->getType(),
8528	CostKind: TargetTransformInfo::TCK_SizeAndLatency) >
8529	TargetTransformInfo::TCC_Basic)
8530	return false;
8531	Value *GEPIOp = GEPI->getOperand(i_nocapture: `0`);
8532	// Check that GEPIOp is an instruction that's also defined in SrcBlock.
8533	if (!isa<Instruction>(Val: GEPIOp))
8534	return false;
8535	auto *GEPIOpI = cast<Instruction>(Val: GEPIOp);
8536	if (GEPIOpI->getParent() != SrcBlock)
8537	return false;
8538	// Check that GEP is used outside the block, meaning it's alive on the
8539	// IndirectBr edge(s).
8540	if (llvm::none_of(Range: GEPI->users(), P: [&](User *Usr) {
8541	if (auto *I = dyn_cast<Instruction>(Val: Usr)) {
8542	if (I->getParent() != SrcBlock) {
8543	return true;
8544	}
8545	}
8546	return false;
8547	}))
8548	return false;
8549	// The second elements of the GEP chains to be unmerged.
8550	std::vector<GetElementPtrInst *> UGEPIs;
8551	// Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8552	// on IndirectBr edges.
8553	for (User *Usr : GEPIOp->users()) {
8554	if (Usr == GEPI)
8555	continue;
8556	// Check if Usr is an Instruction. If not, give up.
8557	if (!isa<Instruction>(Val: Usr))
8558	return false;
8559	auto *UI = cast<Instruction>(Val: Usr);
8560	// Check if Usr in the same block as GEPIOp, which is fine, skip.
8561	if (UI->getParent() == SrcBlock)
8562	continue;
8563	// Check if Usr is a GEP. If not, give up.
8564	if (!isa<GetElementPtrInst>(Val: Usr))
8565	return false;
8566	auto *UGEPI = cast<GetElementPtrInst>(Val: Usr);
8567	// Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8568	// the pointer operand to it. If so, record it in the vector. If not, give
8569	// up.
8570	if (!GEPSequentialConstIndexed(GEP: UGEPI))
8571	return false;
8572	if (UGEPI->getOperand(i_nocapture: `0`) != GEPIOp)
8573	return false;
8574	if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8575	return false;
8576	if (GEPIIdx->getType() !=
8577	cast<ConstantInt>(Val: UGEPI->getOperand(i_nocapture: `1`))->getType())
8578	return false;
8579	ConstantInt *UGEPIIdx = cast<ConstantInt>(Val: UGEPI->getOperand(i_nocapture: `1`));
8580	if (TTI->getIntImmCost(Imm: UGEPIIdx->getValue(), Ty: UGEPIIdx->getType(),
8581	CostKind: TargetTransformInfo::TCK_SizeAndLatency) >
8582	TargetTransformInfo::TCC_Basic)
8583	return false;
8584	UGEPIs.push_back(x: UGEPI);
8585	}
8586	if (UGEPIs.size() == `0`)
8587	return false;
8588	// Check the materializing cost of (Uidx-Idx).
8589	for (GetElementPtrInst *UGEPI : UGEPIs) {
8590	ConstantInt *UGEPIIdx = cast<ConstantInt>(Val: UGEPI->getOperand(i_nocapture: `1`));
8591	APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8592	InstructionCost ImmCost = TTI->getIntImmCost(
8593	Imm: NewIdx, Ty: GEPIIdx->getType(), CostKind: TargetTransformInfo::TCK_SizeAndLatency);
8594	if (ImmCost > TargetTransformInfo::TCC_Basic)
8595	return false;
8596	}
8597	// Now unmerge between GEPI and UGEPIs.
8598	for (GetElementPtrInst *UGEPI : UGEPIs) {
8599	UGEPI->setOperand(i_nocapture: `0`, Val_nocapture: GEPI);
8600	ConstantInt *UGEPIIdx = cast<ConstantInt>(Val: UGEPI->getOperand(i_nocapture: `1`));
8601	Constant *NewUGEPIIdx = ConstantInt::get(
8602	Ty: GEPIIdx->getType(), V: UGEPIIdx->getValue() - GEPIIdx->getValue());
8603	UGEPI->setOperand(i_nocapture: `1`, Val_nocapture: NewUGEPIIdx);
8604	// If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8605	// inbounds to avoid UB.
8606	if (!GEPI->isInBounds()) {
8607	UGEPI->setIsInBounds(false);
8608	}
8609	}
8610	// After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8611	// alive on IndirectBr edges).
8612	assert(llvm::none_of(GEPIOp->users(),
8613	[&](User *Usr) {
8614	return cast<Instruction>(Usr)->getParent() != SrcBlock;
8615	}) &&
8616	"GEPIOp is used outside SrcBlock");
8617	return true;
8618	}
8619
8620	static bool optimizeBranch(BranchInst Branch, const* TargetLowering &TLI,
8621	SmallSet<BasicBlock *, `32`> &FreshBBs,
8622	bool IsHugeFunc) {
8623	// Try and convert
8624	// %c = icmp ult %x, 8
8625	// br %c, bla, blb
8626	// %tc = lshr %x, 3
8627	// to
8628	// %tc = lshr %x, 3
8629	// %c = icmp eq %tc, 0
8630	// br %c, bla, blb
8631	// Creating the cmp to zero can be better for the backend, especially if the
8632	// lshr produces flags that can be used automatically.
8633	if (!TLI.preferZeroCompareBranch() \|\| !Branch->isConditional())
8634	return false;
8635
8636	ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: Branch->getCondition());
8637	if (!Cmp \|\| !isa<ConstantInt>(Val: Cmp->getOperand(i_nocapture: `1`)) \|\| !Cmp->hasOneUse())
8638	return false;
8639
8640	Value *X = Cmp->getOperand(i_nocapture: `0`);
8641	if (!X->hasUseList())
8642	return false;
8643
8644	APInt CmpC = cast<ConstantInt>(Val: Cmp->getOperand(i_nocapture: `1`))->getValue();
8645
8646	for (auto *U : X->users()) {
8647	Instruction *UI = dyn_cast<Instruction>(Val: U);
8648	// A quick dominance check
8649	if (!UI \|\|
8650	(UI->getParent() != Branch->getParent() &&
8651	UI->getParent() != Branch->getSuccessor(i: `0`) &&
8652	UI->getParent() != Branch->getSuccessor(i: `1`)) \|\|
8653	(UI->getParent() != Branch->getParent() &&
8654	!UI->getParent()->getSinglePredecessor()))
8655	continue;
8656
8657	if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8658	match(V: UI, P: m_Shr(L: m_Specific(V: X), R: m_SpecificInt(V: CmpC.logBase2())))) {
8659	IRBuilder<> Builder(Branch);
8660	if (UI->getParent() != Branch->getParent())
8661	UI->moveBefore(InsertPos: Branch->getIterator());
8662	UI->dropPoisonGeneratingFlags();
8663	Value *NewCmp = Builder.CreateCmp(Pred: ICmpInst::ICMP_EQ, LHS: UI,
8664	RHS: ConstantInt::get(Ty: UI->getType(), V: `0`));
8665	LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8666	LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8667	replaceAllUsesWith(Old: Cmp, New: NewCmp, FreshBBs, IsHuge: IsHugeFunc);
8668	return true;
8669	}
8670	if (Cmp->isEquality() &&
8671	(match(V: UI, P: m_Add(L: m_Specific(V: X), R: m_SpecificInt(V: -CmpC))) \|\|
8672	match(V: UI, P: m_Sub(L: m_Specific(V: X), R: m_SpecificInt(V: CmpC))) \|\|
8673	match(V: UI, P: m_Xor(L: m_Specific(V: X), R: m_SpecificInt(V: CmpC))))) {
8674	IRBuilder<> Builder(Branch);
8675	if (UI->getParent() != Branch->getParent())
8676	UI->moveBefore(InsertPos: Branch->getIterator());
8677	UI->dropPoisonGeneratingFlags();
8678	Value *NewCmp = Builder.CreateCmp(Pred: Cmp->getPredicate(), LHS: UI,
8679	RHS: ConstantInt::get(Ty: UI->getType(), V: `0`));
8680	LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8681	LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8682	replaceAllUsesWith(Old: Cmp, New: NewCmp, FreshBBs, IsHuge: IsHugeFunc);
8683	return true;
8684	}
8685	}
8686	return false;
8687	}
8688
8689	bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8690	bool AnyChange = false;
8691	AnyChange = fixupDbgVariableRecordsOnInst(I&: *I);
8692
8693	// Bail out if we inserted the instruction to prevent optimizations from
8694	// stepping on each other's toes.
8695	if (InsertedInsts.count(Ptr: I))
8696	return AnyChange;
8697
8698	// TODO: Move into the switch on opcode below here.
8699	if (PHINode *P = dyn_cast<PHINode>(Val: I)) {
8700	// It is possible for very late stage optimizations (such as SimplifyCFG)
8701	// to introduce PHI nodes too late to be cleaned up. If we detect such a
8702	// trivial PHI, go ahead and zap it here.
8703	if (Value V = simplifyInstruction(I: P, Q: {DL, TLInfo})) {
8704	LargeOffsetGEPMap.erase(Key: P);
8705	replaceAllUsesWith(Old: P, New: V, FreshBBs, IsHuge: IsHugeFunc);
8706	P->eraseFromParent();
8707	++NumPHIsElim;
8708	return true;
8709	}
8710	return AnyChange;
8711	}
8712
8713	if (CastInst *CI = dyn_cast<CastInst>(Val: I)) {
8714	// If the source of the cast is a constant, then this should have
8715	// already been constant folded. The only reason NOT to constant fold
8716	// it is if something (e.g. LSR) was careful to place the constant
8717	// evaluation in a block other than then one that uses it (e.g. to hoist
8718	// the address of globals out of a loop). If this is the case, we don't
8719	// want to forward-subst the cast.
8720	if (isa<Constant>(Val: CI->getOperand(i_nocapture: `0`)))
8721	return AnyChange;
8722
8723	if (OptimizeNoopCopyExpression(CI, TLI: TLI, DL: DL))
8724	return true;
8725
8726	if ((isa<UIToFPInst>(Val: I) \|\| isa<SIToFPInst>(Val: I) \|\| isa<FPToUIInst>(Val: I) \|\|
8727	isa<TruncInst>(Val: I)) &&
8728	TLI->optimizeExtendOrTruncateConversion(
8729	I, L: LI->getLoopFor(BB: I->getParent()), TTI: *TTI))
8730	return true;
8731
8732	if (isa<ZExtInst>(Val: I) \|\| isa<SExtInst>(Val: I)) {
8733	/// Sink a zext or sext into its user blocks if the target type doesn't
8734	/// fit in one register
8735	if (TLI->getTypeAction(Context&: CI->getContext(),
8736	VT: TLI->getValueType(DL: *DL, Ty: CI->getType())) ==
8737	TargetLowering::TypeExpandInteger) {
8738	return SinkCast(CI);
8739	} else {
8740	if (TLI->optimizeExtendOrTruncateConversion(
8741	I, L: LI->getLoopFor(BB: I->getParent()), TTI: *TTI))
8742	return true;
8743
8744	bool MadeChange = optimizeExt(Inst&: I);
8745	return MadeChange \| optimizeExtUses(I);
8746	}
8747	}
8748	return AnyChange;
8749	}
8750
8751	if (auto *Cmp = dyn_cast<CmpInst>(Val: I))
8752	if (optimizeCmp(Cmp, ModifiedDT))
8753	return true;
8754
8755	if (match(V: I, P: m_URem(L: m_Value(), R: m_Value())))
8756	if (optimizeURem(Rem: I))
8757	return true;
8758
8759	if (LoadInst *LI = dyn_cast<LoadInst>(Val: I)) {
8760	LI->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: nullptr);
8761	bool Modified = optimizeLoadExt(Load: LI);
8762	unsigned AS = LI->getPointerAddressSpace();
8763	Modified \|= optimizeMemoryInst(MemoryInst: I, Addr: I->getOperand(i: `0`), AccessTy: LI->getType(), AddrSpace: AS);
8764	return Modified;
8765	}
8766
8767	if (StoreInst *SI = dyn_cast<StoreInst>(Val: I)) {
8768	if (splitMergedValStore(SI&: SI, DL: DL, TLI: *TLI))
8769	return true;
8770	SI->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: nullptr);
8771	unsigned AS = SI->getPointerAddressSpace();
8772	return optimizeMemoryInst(MemoryInst: I, Addr: SI->getOperand(i_nocapture: `1`),
8773	AccessTy: SI->getOperand(i_nocapture: `0`)->getType(), AddrSpace: AS);
8774	}
8775
8776	if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: I)) {
8777	unsigned AS = RMW->getPointerAddressSpace();
8778	return optimizeMemoryInst(MemoryInst: I, Addr: RMW->getPointerOperand(), AccessTy: RMW->getType(), AddrSpace: AS);
8779	}
8780
8781	if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: I)) {
8782	unsigned AS = CmpX->getPointerAddressSpace();
8783	return optimizeMemoryInst(MemoryInst: I, Addr: CmpX->getPointerOperand(),
8784	AccessTy: CmpX->getCompareOperand()->getType(), AddrSpace: AS);
8785	}
8786
8787	BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: I);
8788
8789	if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8790	sinkAndCmp0Expression(AndI: BinOp, TLI: *TLI, InsertedInsts))
8791	return true;
8792
8793	// TODO: Move this into the switch on opcode - it handles shifts already.
8794	if (BinOp && (BinOp->getOpcode() == Instruction::AShr \|\|
8795	BinOp->getOpcode() == Instruction::LShr)) {
8796	ConstantInt *CI = dyn_cast<ConstantInt>(Val: BinOp->getOperand(i_nocapture: `1`));
8797	if (CI && TLI->hasExtractBitsInsn())
8798	if (OptimizeExtractBits(ShiftI: BinOp, CI, TLI: TLI, DL: DL))
8799	return true;
8800	}
8801
8802	if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Val: I)) {
8803	if (GEPI->hasAllZeroIndices()) {
8804	/// The GEP operand must be a pointer, so must its result -> BitCast
8805	Instruction NC = new* BitCastInst (GEPI->getOperand(i_nocapture: `0`), GEPI->getType(),
8806	GEPI->getName(), GEPI->getIterator());
8807	NC->setDebugLoc(GEPI->getDebugLoc());
8808	replaceAllUsesWith(Old: GEPI, New: NC, FreshBBs, IsHuge: IsHugeFunc);
8809	RecursivelyDeleteTriviallyDeadInstructions(
8810	V: GEPI, TLI: TLInfo, MSSAU: nullptr,
8811	AboutToDeleteCallback: [&](Value *V) { removeAllAssertingVHReferences(V); });
8812	++NumGEPsElim;
8813	optimizeInst(I: NC, ModifiedDT);
8814	return true;
8815	}
8816	if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
8817	return true;
8818	}
8819	}
8820
8821	if (FreezeInst *FI = dyn_cast<FreezeInst>(Val: I)) {
8822	// freeze(icmp a, const)) -> icmp (freeze a), const
8823	// This helps generate efficient conditional jumps.
8824	Instruction CmpI = nullptr*;
8825	if (ICmpInst *II = dyn_cast<ICmpInst>(Val: FI->getOperand(i_nocapture: `0`)))
8826	CmpI = II;
8827	else if (FCmpInst *F = dyn_cast<FCmpInst>(Val: FI->getOperand(i_nocapture: `0`)))
8828	CmpI = F->getFastMathFlags().none() ? F : nullptr;
8829
8830	if (CmpI && CmpI->hasOneUse()) {
8831	auto Op0 = CmpI->getOperand(i: `0`), Op1 = CmpI->getOperand(i: `1`);
8832	bool Const0 = isa<ConstantInt>(Val: Op0) \|\| isa<ConstantFP>(Val: Op0) \|\|
8833	isa<ConstantPointerNull>(Val: Op0);
8834	bool Const1 = isa<ConstantInt>(Val: Op1) \|\| isa<ConstantFP>(Val: Op1) \|\|
8835	isa<ConstantPointerNull>(Val: Op1);
8836	if (Const0 \|\| Const1) {
8837	if (!Const0 \|\| !Const1) {
8838	auto F = new* FreezeInst (Const0 ? Op1 : Op0, "", CmpI->getIterator());
8839	F->takeName(V: FI);
8840	CmpI->setOperand(i: Const0 ? `1` : `0`, Val: F);
8841	}
8842	replaceAllUsesWith(Old: FI, New: CmpI, FreshBBs, IsHuge: IsHugeFunc);
8843	FI->eraseFromParent();
8844	return true;
8845	}
8846	}
8847	return AnyChange;
8848	}
8849
8850	if (tryToSinkFreeOperands(I))
8851	return true;
8852
8853	switch (I->getOpcode()) {
8854	case Instruction::Shl:
8855	case Instruction::LShr:
8856	case Instruction::AShr:
8857	return optimizeShiftInst(Shift: cast<BinaryOperator>(Val: I));
8858	case Instruction::Call:
8859	return optimizeCallInst(CI: cast<CallInst>(Val: I), ModifiedDT);
8860	case Instruction::Select:
8861	return optimizeSelectInst(SI: cast<SelectInst>(Val: I));
8862	case Instruction::ShuffleVector:
8863	return optimizeShuffleVectorInst(SVI: cast<ShuffleVectorInst>(Val: I));
8864	case Instruction::Switch:
8865	return optimizeSwitchInst(SI: cast<SwitchInst>(Val: I));
8866	case Instruction::ExtractElement:
8867	return optimizeExtractElementInst(Inst: cast<ExtractElementInst>(Val: I));
8868	case Instruction::Br:
8869	return optimizeBranch(Branch: cast<BranchInst>(Val: I), TLI: *TLI, FreshBBs, IsHugeFunc);
8870	}
8871
8872	return AnyChange;
8873	}
8874
8875	/// Given an OR instruction, check to see if this is a bitreverse
8876	/// idiom. If so, insert the new intrinsic and return true.
8877	bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8878	if (!I.getType()->isIntegerTy() \|\|
8879	!TLI->isOperationLegalOrCustom(Op: ISD::BITREVERSE,
8880	VT: TLI->getValueType(DL: DL, Ty: I.getType(), AllowUnknown: true*)))
8881	return false;
8882
8883	SmallVector<Instruction *, `4`> Insts;
8884	if (!recognizeBSwapOrBitReverseIdiom(I: &I, MatchBSwaps: false, MatchBitReversals: true, InsertedInsts&: Insts))
8885	return false;
8886	Instruction *LastInst = Insts.back();
8887	replaceAllUsesWith(Old: &I, New: LastInst, FreshBBs, IsHuge: IsHugeFunc);
8888	RecursivelyDeleteTriviallyDeadInstructions(
8889	V: &I, TLI: TLInfo, MSSAU: nullptr,
8890	AboutToDeleteCallback: [&](Value *V) { removeAllAssertingVHReferences(V); });
8891	return true;
8892	}
8893
8894	// In this pass we look for GEP and cast instructions that are used
8895	// across basic blocks and rewrite them to improve basic-block-at-a-time
8896	// selection.
8897	bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8898	SunkAddrs.clear();
8899	bool MadeChange = false;
8900
8901	do {
8902	CurInstIterator = BB.begin();
8903	ModifiedDT = ModifyDT::NotModifyDT;
8904	while (CurInstIterator != BB.end()) {
8905	MadeChange \|= optimizeInst(I: &*CurInstIterator ++, ModifiedDT);
8906	if (ModifiedDT != ModifyDT::NotModifyDT) {
8907	// For huge function we tend to quickly go though the inner optmization
8908	// opportunities in the BB. So we go back to the BB head to re-optimize
8909	// each instruction instead of go back to the function head.
8910	if (IsHugeFunc) {
8911	DT.reset();
8912	getDT(F&: *BB.getParent());
8913	break;
8914	} else {
8915	return true;
8916	}
8917	}
8918	}
8919	} while (ModifiedDT == ModifyDT::ModifyInstDT);
8920
8921	bool MadeBitReverse = true;
8922	while (MadeBitReverse) {
8923	MadeBitReverse = false;
8924	for (auto &I : reverse(C&: BB)) {
8925	if (makeBitReverse(I)) {
8926	MadeBitReverse = MadeChange = true;
8927	break;
8928	}
8929	}
8930	}
8931	MadeChange \|= dupRetToEnableTailCallOpts(BB: &BB, ModifiedDT);
8932
8933	return MadeChange;
8934	}
8935
8936	// Some CGP optimizations may move or alter what's computed in a block. Check
8937	// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8938	bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8939	assert(isa<DbgValueInst>(I));
8940	DbgValueInst &DVI = *cast<DbgValueInst>(Val: I);
8941
8942	// Does this dbg.value refer to a sunk address calculation?
8943	bool AnyChange = false;
8944	SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8945	DVI.location_ops().end());
8946	for (Value *Location : LocationOps) {
8947	WeakTrackingVH SunkAddrVH = SunkAddrs [Location];
8948	Value SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr*;
8949	if (SunkAddr) {
8950	// Point dbg.value at locally computed address, which should give the best
8951	// opportunity to be accurately lowered. This update may change the type
8952	// of pointer being referred to; however this makes no difference to
8953	// debugging information, and we can't generate bitcasts that may affect
8954	// codegen.
8955	DVI.replaceVariableLocationOp(OldValue: Location, NewValue: SunkAddr);
8956	AnyChange = true;
8957	}
8958	}
8959	return AnyChange;
8960	}
8961
8962	bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8963	bool AnyChange = false;
8964	for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange()))
8965	AnyChange \|= fixupDbgVariableRecord(I&: DVR);
8966	return AnyChange;
8967	}
8968
8969	// FIXME: should updating debug-info really cause the "changed" flag to fire,
8970	// which can cause a function to be reprocessed?
8971	bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8972	if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8973	DVR.Type != DbgVariableRecord::LocationType::Assign)
8974	return false;
8975
8976	// Does this DbgVariableRecord refer to a sunk address calculation?
8977	bool AnyChange = false;
8978	SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8979	DVR.location_ops().end());
8980	for (Value *Location : LocationOps) {
8981	WeakTrackingVH SunkAddrVH = SunkAddrs [Location];
8982	Value SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr*;
8983	if (SunkAddr) {
8984	// Point dbg.value at locally computed address, which should give the best
8985	// opportunity to be accurately lowered. This update may change the type
8986	// of pointer being referred to; however this makes no difference to
8987	// debugging information, and we can't generate bitcasts that may affect
8988	// codegen.
8989	DVR.replaceVariableLocationOp(OldValue: Location, NewValue: SunkAddr);
8990	AnyChange = true;
8991	}
8992	}
8993	return AnyChange;
8994	}
8995
8996	static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI) {
8997	DVI->removeFromParent();
8998	if (isa<PHINode>(Val: VI))
8999	DVI->insertBefore(InsertPos: VI ->getParent()->getFirstInsertionPt());
9000	else
9001	DVI->insertAfter(InsertPos: VI);
9002	}
9003
9004	static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI) {
9005	DVR->removeFromParent();
9006	BasicBlock *VIBB = VI ->getParent();
9007	if (isa<PHINode>(Val: VI))
9008	VIBB->insertDbgRecordBefore(DR: DVR, Here: VIBB->getFirstInsertionPt());
9009	else
9010	VIBB->insertDbgRecordAfter(DR: DVR, I: &*VI);
9011	}
9012
9013	// A llvm.dbg.value may be using a value before its definition, due to
9014	// optimizations in this pass and others. Scan for such dbg.values, and rescue
9015	// them by moving the dbg.value to immediately after the value definition.
9016	// FIXME: Ideally this should never be necessary, and this has the potential
9017	// to re-order dbg.value intrinsics.
9018	bool CodeGenPrepare::placeDbgValues(Function &F) {
9019	bool MadeChange = false;
9020	DominatorTree DT(F);
9021
9022	auto DbgProcessor = [&](auto DbgItem, Instruction Position) {
9023	SmallVector<Instruction *, `4`> VIs;
9024	for (Value *V : DbgItem->location_ops())
9025	if (Instruction *VI = dyn_cast_or_null<Instruction>(Val: V))
9026	VIs.push_back(Elt: VI);
9027
9028	// This item may depend on multiple instructions, complicating any
9029	// potential sink. This block takes the defensive approach, opting to
9030	// "undef" the item if it has more than one instruction and any of them do
9031	// not dominate iem.
9032	for (Instruction *VI : VIs) {
9033	if (VI->isTerminator())
9034	continue;
9035
9036	// If VI is a phi in a block with an EHPad terminator, we can't insert
9037	// after it.
9038	if (isa<PHINode>(Val: VI) && VI->getParent()->getTerminator()->isEHPad())
9039	continue;
9040
9041	// If the defining instruction dominates the dbg.value, we do not need
9042	// to move the dbg.value.
9043	if (DT.dominates(Def: VI, User: Position))
9044	continue;
9045
9046	// If we depend on multiple instructions and any of them doesn't
9047	// dominate this DVI, we probably can't salvage it: moving it to
9048	// after any of the instructions could cause us to lose the others.
9049	if (VIs.size() > `1`) {
9050	LLVM_DEBUG(
9051	dbgs()
9052	<< "Unable to find valid location for Debug Value, undefing:\n"
9053	<< *DbgItem);
9054	DbgItem->setKillLocation();
9055	break;
9056	}
9057
9058	LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9059	<< DbgItem << `' '` << VI);
9060	DbgInserterHelper(DbgItem, VI->getIterator());
9061	MadeChange = true;
9062	++NumDbgValueMoved;
9063	}
9064	};
9065
9066	for (BasicBlock &BB : F) {
9067	for (Instruction &Insn : llvm::make_early_inc_range(Range&: BB)) {
9068	// Process dbg.value intrinsics.
9069	DbgValueInst *DVI = dyn_cast<DbgValueInst>(Val: &Insn);
9070	if (DVI) {
9071	DbgProcessor (DVI, DVI);
9072	continue;
9073	}
9074
9075	// If this isn't a dbg.value, process any attached DbgVariableRecord
9076	// records attached to this instruction.
9077	for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9078	Range: filterDbgVars(R: Insn.getDbgRecordRange()))) {
9079	if (DVR.Type != DbgVariableRecord::LocationType::Value)
9080	continue;
9081	DbgProcessor (&DVR, &Insn);
9082	}
9083	}
9084	}
9085
9086	return MadeChange;
9087	}
9088
9089	// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9090	// probes can be chained dependencies of other regular DAG nodes and block DAG
9091	// combine optimizations.
9092	bool CodeGenPrepare::placePseudoProbes(Function &F) {
9093	bool MadeChange = false;
9094	for (auto &Block : F) {
9095	// Move the rest probes to the beginning of the block.
9096	auto FirstInst = Block.getFirstInsertionPt();
9097	while (FirstInst != Block.end() && FirstInst ->isDebugOrPseudoInst())
9098	++FirstInst;
9099	BasicBlock::iterator I(FirstInst);
9100	I ++;
9101	while (I != Block.end()) {
9102	if (auto *II = dyn_cast<PseudoProbeInst>(Val: I ++)) {
9103	II->moveBefore(InsertPos: FirstInst);
9104	MadeChange = true;
9105	}
9106	}
9107	}
9108	return MadeChange;
9109	}
9110
9111	/// Scale down both weights to fit into uint32_t.
9112	static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9113	uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9114	uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + `1`;
9115	NewTrue = NewTrue / Scale;
9116	NewFalse = NewFalse / Scale;
9117	}
9118
9119	/// Some targets prefer to split a conditional branch like:
9120	/// \code
9121	/// %0 = icmp ne i32 %a, 0
9122	/// %1 = icmp ne i32 %b, 0
9123	/// %or.cond = or i1 %0, %1
9124	/// br i1 %or.cond, label %TrueBB, label %FalseBB
9125	/// \endcode
9126	/// into multiple branch instructions like:
9127	/// \code
9128	/// bb1:
9129	/// %0 = icmp ne i32 %a, 0
9130	/// br i1 %0, label %TrueBB, label %bb2
9131	/// bb2:
9132	/// %1 = icmp ne i32 %b, 0
9133	/// br i1 %1, label %TrueBB, label %FalseBB
9134	/// \endcode
9135	/// This usually allows instruction selection to do even further optimizations
9136	/// and combine the compare with the branch instruction. Currently this is
9137	/// applied for targets which have "cheap" jump instructions.
9138	///
9139	/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9140	///
9141	bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9142	if (!TM->Options.EnableFastISel \|\| TLI->isJumpExpensive())
9143	return false;
9144
9145	bool MadeChange = false;
9146	for (auto &BB : F) {
9147	// Does this BB end with the following?
9148	// %cond1 = icmp\|fcmp\|binary instruction ...
9149	// %cond2 = icmp\|fcmp\|binary instruction ...
9150	// %cond.or = or\|and i1 %cond1, cond2
9151	// br i1 %cond.or label %dest1, label %dest2"
9152	Instruction *LogicOp;
9153	BasicBlock TBB, FBB;
9154	if (!match(V: BB.getTerminator(),
9155	P: m_Br(C: m_OneUse(SubPattern: m_Instruction(I&: LogicOp)), T&: TBB, F&: FBB)))
9156	continue;
9157
9158	auto *Br1 = cast<BranchInst>(Val: BB.getTerminator());
9159	if (Br1->getMetadata(KindID: LLVMContext::MD_unpredictable))
9160	continue;
9161
9162	// The merging of mostly empty BB can cause a degenerate branch.
9163	if (TBB == FBB)
9164	continue;
9165
9166	unsigned Opc;
9167	Value Cond1, Cond2;
9168	if (match(V: LogicOp,
9169	P: m_LogicalAnd(L: m_OneUse(SubPattern: m_Value(V&: Cond1)), R: m_OneUse(SubPattern: m_Value(V&: Cond2)))))
9170	Opc = Instruction::And;
9171	else if (match(V: LogicOp, P: m_LogicalOr(L: m_OneUse(SubPattern: m_Value(V&: Cond1)),
9172	R: m_OneUse(SubPattern: m_Value(V&: Cond2)))))
9173	Opc = Instruction::Or;
9174	else
9175	continue;
9176
9177	auto IsGoodCond = [](Value *Cond) {
9178	return match(
9179	V: Cond,
9180	P: m_CombineOr(L: m_Cmp(), R: m_CombineOr(L: m_LogicalAnd(L: m_Value(), R: m_Value()),
9181	R: m_LogicalOr(L: m_Value(), R: m_Value()))));
9182	};
9183	if (!IsGoodCond (Cond1) \|\| !IsGoodCond (Cond2))
9184	continue;
9185
9186	LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9187
9188	// Create a new BB.
9189	auto *TmpBB =
9190	BasicBlock::Create(Context&: BB.getContext(), Name: BB.getName() + ".cond.split",
9191	Parent: BB.getParent(), InsertBefore: BB.getNextNode());
9192	if (IsHugeFunc)
9193	FreshBBs.insert(Ptr: TmpBB);
9194
9195	// Update original basic block by using the first condition directly by the
9196	// branch instruction and removing the no longer needed and/or instruction.
9197	Br1->setCondition(Cond1);
9198	LogicOp->eraseFromParent();
9199
9200	// Depending on the condition we have to either replace the true or the
9201	// false successor of the original branch instruction.
9202	if (Opc == Instruction::And)
9203	Br1->setSuccessor(idx: `0`, NewSucc: TmpBB);
9204	else
9205	Br1->setSuccessor(idx: `1`, NewSucc: TmpBB);
9206
9207	// Fill in the new basic block.
9208	auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond: Cond2, True: TBB, False: FBB);
9209	if (auto *I = dyn_cast<Instruction>(Val: Cond2)) {
9210	I->removeFromParent();
9211	I->insertBefore(InsertPos: Br2->getIterator());
9212	}
9213
9214	// Update PHI nodes in both successors. The original BB needs to be
9215	// replaced in one successor's PHI nodes, because the branch comes now from
9216	// the newly generated BB (NewBB). In the other successor we need to add one
9217	// incoming edge to the PHI nodes, because both branch instructions target
9218	// now the same successor. Depending on the original branch condition
9219	// (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9220	// we perform the correct update for the PHI nodes.
9221	// This doesn't change the successor order of the just created branch
9222	// instruction (or any other instruction).
9223	if (Opc == Instruction::Or)
9224	std::swap(a&: TBB, b&: FBB);
9225
9226	// Replace the old BB with the new BB.
9227	TBB->replacePhiUsesWith(Old: &BB, New: TmpBB);
9228
9229	// Add another incoming edge from the new BB.
9230	for (PHINode &PN : FBB->phis()) {
9231	auto *Val = PN.getIncomingValueForBlock(BB: &BB);
9232	PN.addIncoming(V: Val, BB: TmpBB);
9233	}
9234
9235	// Update the branch weights (from SelectionDAGBuilder::
9236	// FindMergedConditions).
9237	if (Opc == Instruction::Or) {
9238	// Codegen X \| Y as:
9239	// BB1:
9240	// jmp_if_X TBB
9241	// jmp TmpBB
9242	// TmpBB:
9243	// jmp_if_Y TBB
9244	// jmp FBB
9245	//
9246
9247	// We have flexibility in setting Prob for BB1 and Prob for NewBB.
9248	// The requirement is that
9249	// TrueProb for BB1 + (FalseProb for BB1 TrueProb for TmpBB)*
9250	// = TrueProb for original BB.
9251	// Assuming the original weights are A and B, one choice is to set BB1's
9252	// weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9253	// assumes that
9254	// TrueProb for BB1 == FalseProb for BB1 TrueProb for TmpBB.*
9255	// Another choice is to assume TrueProb for BB1 equals to TrueProb for
9256	// TmpBB, but the math is more complicated.
9257	uint64_t TrueWeight, FalseWeight;
9258	if (extractBranchWeights(I: *Br1, TrueVal&: TrueWeight, FalseVal&: FalseWeight)) {
9259	uint64_t NewTrueWeight = TrueWeight;
9260	uint64_t NewFalseWeight = TrueWeight + `2` * FalseWeight;
9261	scaleWeights(NewTrue&: NewTrueWeight, NewFalse&: NewFalseWeight);
9262	Br1->setMetadata(KindID: LLVMContext::MD_prof,
9263	Node: MDBuilder (Br1->getContext())
9264	.createBranchWeights(TrueWeight, FalseWeight,
9265	IsExpected: hasBranchWeightOrigin(I: *Br1)));
9266
9267	NewTrueWeight = TrueWeight;
9268	NewFalseWeight = `2` * FalseWeight;
9269	scaleWeights(NewTrue&: NewTrueWeight, NewFalse&: NewFalseWeight);
9270	Br2->setMetadata(KindID: LLVMContext::MD_prof,
9271	Node: MDBuilder (Br2->getContext())
9272	.createBranchWeights(TrueWeight, FalseWeight));
9273	}
9274	} else {
9275	// Codegen X & Y as:
9276	// BB1:
9277	// jmp_if_X TmpBB
9278	// jmp FBB
9279	// TmpBB:
9280	// jmp_if_Y TBB
9281	// jmp FBB
9282	//
9283	// This requires creation of TmpBB after CurBB.
9284
9285	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9286	// The requirement is that
9287	// FalseProb for BB1 + (TrueProb for BB1 FalseProb for TmpBB)*
9288	// = FalseProb for original BB.
9289	// Assuming the original weights are A and B, one choice is to set BB1's
9290	// weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9291	// assumes that
9292	// FalseProb for BB1 == TrueProb for BB1 FalseProb for TmpBB.*
9293	uint64_t TrueWeight, FalseWeight;
9294	if (extractBranchWeights(I: *Br1, TrueVal&: TrueWeight, FalseVal&: FalseWeight)) {
9295	uint64_t NewTrueWeight = `2` * TrueWeight + FalseWeight;
9296	uint64_t NewFalseWeight = FalseWeight;
9297	scaleWeights(NewTrue&: NewTrueWeight, NewFalse&: NewFalseWeight);
9298	Br1->setMetadata(KindID: LLVMContext::MD_prof,
9299	Node: MDBuilder (Br1->getContext())
9300	.createBranchWeights(TrueWeight, FalseWeight));
9301
9302	NewTrueWeight = `2` * TrueWeight;
9303	NewFalseWeight = FalseWeight;
9304	scaleWeights(NewTrue&: NewTrueWeight, NewFalse&: NewFalseWeight);
9305	Br2->setMetadata(KindID: LLVMContext::MD_prof,
9306	Node: MDBuilder (Br2->getContext())
9307	.createBranchWeights(TrueWeight, FalseWeight));
9308	}
9309	}
9310
9311	ModifiedDT = ModifyDT::ModifyBBDT;
9312	MadeChange = true;
9313
9314	LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9315	TmpBB->dump());
9316	}
9317	return MadeChange;
9318	}
9319

Provided by KDAB

Definitions

NumBlocksElim
NumPHIsElim
NumGEPsElim
NumCmpUses
NumCastUses
NumMemoryInsts
NumMemoryInstsPhiCreated
NumMemoryInstsSelectCreated
NumExtsMoved
NumExtUses
NumAndsAdded
NumAndUses
NumRetsDup
NumDbgValueMoved
NumSelectsExpanded
NumStoreExtractExposed
DisableBranchOpts
DisableGCOpts
DisableSelectToBranch
AddrSinkUsingGEPs
EnableAndCmpSinking
DisableStoreExtract
StressStoreExtract
DisableExtLdPromotion
StressExtLdPromotion
DisablePreheaderProtect
ProfileGuidedSectionPrefix
ProfileUnknownInSpecialSection
BBSectionsGuidedSectionPrefix
FreqRatioToSkipMerge
ForceSplitStore
EnableTypePromotionMerge
DisableComplexAddrModes
AddrSinkNewPhis
AddrSinkNewSelects
AddrSinkCombineBaseReg
AddrSinkCombineBaseGV
AddrSinkCombineBaseOffs
AddrSinkCombineScaledReg
EnableGEPOffsetSplit
EnableICMP_EQToICMP_ST
VerifyBFIUpdates
OptimizePhiTypes
HugeFuncThresholdInCGPP
MaxAddressUsersToScan
DisableDeletePHIs
ExtType
ModifyDT
CodeGenPrepare
CodeGenPrepare
CodeGenPrepare
releaseMemory
resetIteratorIfInvalidatedWhileCalling
getDT
CodeGenPrepareLegacyPass
CodeGenPrepareLegacyPass
getPassName
getAnalysisUsage
ID
runOnFunction
createCodeGenPrepareLegacyPass
run
run
_run
eliminateAssumptions
removeAllAssertingVHReferences
verifyBFIUpdates
eliminateFallThrough
findDestBlockOfMergeableEmptyBlock
eliminateMostlyEmptyBlocks
isMergingEmptyBlockProfitable
canMergeBlocks
replaceAllUsesWith
eliminateMostlyEmptyBlock
computeBaseDerivedRelocateMap
getGEPSmallConstantIntOffsetV
simplifyRelocatesOffABase
simplifyOffsetableRelocate
SinkCast
OptimizeNoopCopyExpression
matchIncrement
getIVIncrement
isIVIncrement
replaceMathCmpWithIntrinsic
matchUAddWithOverflowConstantEdgeCases
combineToUAddWithOverflow
combineToUSubWithOverflow
unfoldPowerOf2Test
sinkCmpExpression
foldICmpWithDominatingICmp
swapICmpOperandsToExposeCSEOpportunities
foldFCmpToFPClassTest
isRemOfLoopIncrementWithLoopInvariant
foldURemOfLoopIncrement
optimizeURem
optimizeCmp
sinkAndCmp0Expression
isExtractBitsCandidateUse
SinkShiftAndTruncate
OptimizeExtractBits
despeculateCountZeros
optimizeCallInst
isIntrinsicOrLFToBeTailCalled
dupRetToEnableTailCallOpts
ExtAddrMode
FieldName
ExtAddrMode
replaceWith
compare
isTrivial
GetFieldAsValue
SetCombinedField
operator<<
print
dump
TypePromotionTransaction
TypePromotionAction
TypePromotionAction
~TypePromotionAction
commit
InsertionHandler
InsertionHandler
insert
InstructionMoveBefore
InstructionMoveBefore
undo
OperandSetter
OperandSetter
undo
OperandsHider
OperandsHider
undo
TruncBuilder
TruncBuilder
getBuiltValue
undo
SExtBuilder
SExtBuilder
getBuiltValue
undo
ZExtBuilder
ZExtBuilder
getBuiltValue
undo
TypeMutator
TypeMutator
undo
UsesReplacer
InstructionAndIdx
InstructionAndIdx
UsesReplacer
undo
InstructionRemover
InstructionRemover
~InstructionRemover
operator=
InstructionRemover
undo
TypePromotionTransaction
setOperand
eraseInstruction
replaceAllUsesWith
mutateType
createTrunc
createSExt
createZExt
getRestorationPoint
commit
rollback
AddressingModeMatcher
AddressingModeMatcher
Match
PhiNodeSetIterator
PhiNodeSet
insert
erase
clear
begin
end
size
count
SkipRemovedElements
PhiNodeSetIterator
operator*
operator++
operator==
operator!=
SimplificationTracker
SimplificationTracker
Get
Simplify
Put
ReplacePhi
newPhiNodes
insertNewPhi
insertNewSelect
countNewPhiNodes
countNewSelectNodes
destroyNewNodes
AddressingModeCombiner
AddressingModeCombiner
~AddressingModeCombiner
getAddrMode
addNewAddrMode
combineAddrModes
eraseCommonValueIfDead
initializeMap
findCommon
MatchPhiNode
MatchPhiSet
FillPlaceholders
InsertPlaceholders
addrModeCombiningAllowed
matchScaledValue
MightBeFoldableInst
isPromotedInstructionLegal
TypePromotionHelper
addPromotedInst
getOrigType
shouldExtOperand
signExtendOperandForOther
zeroExtendOperandForOther
canGetThrough
getAction
promoteOperandForTruncAndAnyExt
promoteOperandForOther
isPromotionProfitable
matchOperationAddr
matchAddr
IsOperandAMemoryOperand
FindAllMemoryUses
FindAllMemoryUses
valueAlreadyLiveAtInst
isProfitableToFoldIntoAddressingMode
IsNonLocalValue
findInsertPos
optimizeMemoryInst
optimizeGatherScatterInst
optimizeInlineAsmInst
hasSameExtUse
tryToPromoteExts
mergeSExts
splitLargeGEPOffsets
optimizePhiType
optimizePhiTypes
canFormExtLd
optimizeExt
performAddressTypePromotion
optimizeExtUses
optimizeLoadExt
sinkSelectOperand
isFormingBranchFromSelectProfitable
getTrueOrFalseValue
optimizeShiftInst
optimizeFunnelShift
optimizeSelectInst
optimizeShuffleVectorInst
tryToSinkFreeOperands
optimizeSwitchType
optimizeSwitchPhiConstants
optimizeSwitchInst
VectorPromoteHelper
getEndOfTransition
getTransitionOriginalValueIdx
getTransitionIdx
getTransitionType
isProfitableToPromote
getConstantVector
canCauseUndefinedBehavior
VectorPromoteHelper
canPromote
shouldPromote
canCombine
enqueueForPromotion
recordCombineInstruction
promote
promoteImpl
optimizeExtractElementInst
splitMergedValStore
GEPSequentialConstIndexed
tryUnmergingGEPsAcrossIndirectBr
optimizeBranch
optimizeInst
makeBitReverse
optimizeBlock
fixupDbgValue
fixupDbgVariableRecordsOnInst
fixupDbgVariableRecord
DbgInserterHelper
DbgInserterHelper
placeDbgValues
placePseudoProbes
scaleWeights

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of llvm/lib/CodeGen/CodeGenPrepare.cpp