1/* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2025 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#define INCLUDE_ALGORITHM
22#define INCLUDE_FUNCTIONAL
23#define INCLUDE_ARRAY
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "backend.h"
28#include "rtl.h"
29#include "rtlanal.h"
30#include "df.h"
31#include "rtl-ssa.h"
32
33#include "predict.h"
34#include "cfgrtl.h"
35#include "cfgcleanup.h"
36#include "cfgloop.h"
37#include "tree-pass.h"
38#include "rtl-iter.h"
39#include "target.h"
40
41/* This pass does simple forward propagation and simplification when an
42 operand of an insn can only come from a single def. This pass uses
43 RTL SSA, so it is global. However, we only do limited analysis of
44 available expressions.
45
46 1) The pass tries to propagate the source of the def into the use,
47 and checks if the result is independent of the substituted value.
48 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
49 zero, independent of the source register.
50
51 In particular, we propagate constants into the use site. Sometimes
52 RTL expansion did not put the constant in the same insn on purpose,
53 to satisfy a predicate, and the result will fail to be recognized;
54 but this happens rarely and in this case we can still create a
55 REG_EQUAL note. For multi-word operations, this
56
57 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
58 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
59 (set (subreg:SI (reg:DI 122) 0)
60 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
61 (set (subreg:SI (reg:DI 122) 4)
62 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
63
64 can be simplified to the much simpler
65
66 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
67 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
68
69 This particular propagation is also effective at putting together
70 complex addressing modes. We are more aggressive inside MEMs, in
71 that all definitions are propagated if the use is in a MEM; if the
72 result is a valid memory address we check address_cost to decide
73 whether the substitution is worthwhile.
74
75 2) The pass propagates register copies. This is not as effective as
76 the copy propagation done by CSE's canon_reg, which works by walking
77 the instruction chain, it can help the other transformations.
78
79 We should consider removing this optimization, and instead reorder the
80 RTL passes, because GCSE does this transformation too. With some luck,
81 the CSE pass at the end of rest_of_handle_gcse could also go away.
82
83 3) The pass looks for paradoxical subregs that are actually unnecessary.
84 Things like this:
85
86 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
87 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
88 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
89 (subreg:SI (reg:QI 121) 0)))
90
91 are very common on machines that can only do word-sized operations.
92 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
93 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
94 we can replace the paradoxical subreg with simply (reg:WIDE M). The
95 above will simplify this to
96
97 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
98 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
99 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
100
101 where the first two insns are now dead. */
102
103using namespace rtl_ssa;
104
105static int num_changes;
106
107/* Do not try to replace constant addresses or addresses of local and
108 argument slots. These MEM expressions are made only once and inserted
109 in many instructions, as well as being used to control symbol table
110 output. It is not safe to clobber them.
111
112 There are some uncommon cases where the address is already in a register
113 for some reason, but we cannot take advantage of that because we have
114 no easy way to unshare the MEM. In addition, looking up all stack
115 addresses is costly. */
116
117static bool
118can_simplify_addr (rtx addr)
119{
120 rtx reg;
121
122 if (CONSTANT_ADDRESS_P (addr))
123 return false;
124
125 if (GET_CODE (addr) == PLUS)
126 reg = XEXP (addr, 0);
127 else
128 reg = addr;
129
130 return (!REG_P (reg)
131 || (REGNO (reg) != FRAME_POINTER_REGNUM
132 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
133 && REGNO (reg) != ARG_POINTER_REGNUM));
134}
135
136/* MEM is the result of an address simplification, and temporarily
137 undoing changes OLD_NUM_CHANGES onwards restores the original address.
138 Return whether it is good to use the new address instead of the
139 old one. INSN is the containing instruction. */
140
141static bool
142should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
143{
144 int gain;
145
146 /* Prefer the new address if it is less expensive. */
147 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
148 {
149 undo_recog_changes undo (old_num_changes);
150 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
151 MEM_ADDR_SPACE (mem), speed);
152 }
153 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
154 MEM_ADDR_SPACE (mem), speed);
155
156 /* If the addresses have equivalent cost, prefer the new address
157 if it has the highest `set_src_cost'. That has the potential of
158 eliminating the most insns without additional costs, and it
159 is the same that cse.cc used to do. */
160 if (gain == 0)
161 {
162 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed);
163 undo_recog_changes undo (old_num_changes);
164 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed_p: speed);
165 }
166
167 return (gain > 0);
168}
169
170
171namespace
172{
173 class fwprop_propagation : public insn_propagation
174 {
175 public:
176 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
177 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
178 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
179
180 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
181
182 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
183 bool folded_to_constants_p () const;
184 bool likely_profitable_p () const;
185
186 bool check_mem (int, rtx) final override;
187 void note_simplification (int, uint16_t, rtx, rtx) final override;
188 uint16_t classify_result (rtx, rtx);
189
190 private:
191 const bool single_use_p;
192 const bool single_ebb_p;
193 };
194}
195
196/* Prepare to replace FROM with TO in USE_INSN. */
197
198fwprop_propagation::fwprop_propagation (insn_info *use_insn,
199 set_info *def, rtx from, rtx to)
200 : insn_propagation (use_insn->rtl (), from, to),
201 single_use_p (def->single_nondebug_use ()),
202 single_ebb_p (use_insn->ebb () == def->ebb ())
203{
204 should_check_mems = true;
205 should_note_simplifications = true;
206}
207
208/* MEM is the result of an address simplification, and temporarily
209 undoing changes OLD_NUM_CHANGES onwards restores the original address.
210 Return true if the propagation should continue, false if it has failed. */
211
212bool
213fwprop_propagation::check_mem (int old_num_changes, rtx mem)
214{
215 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
216 MEM_ADDR_SPACE (mem)))
217 {
218 failure_reason = "would create an invalid MEM";
219 return false;
220 }
221
222 bool can_simplify = [&]()
223 {
224 undo_recog_changes undo (old_num_changes);
225 return can_simplify_addr (XEXP (mem, 0));
226 } ();
227 if (!can_simplify)
228 {
229 failure_reason = "would replace a frame address";
230 return false;
231 }
232
233 /* Copy propagations are always ok. Otherwise check the costs. */
234 if (!(REG_P (from) && REG_P (to))
235 && !should_replace_address (old_num_changes, mem, insn))
236 {
237 failure_reason = "would increase the cost of a MEM";
238 return false;
239 }
240
241 result_flags |= CHANGED_MEM;
242 return true;
243}
244
245/* OLDX has been simplified to NEWX. Describe the change in terms of
246 result_flags. */
247
248uint16_t
249fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
250{
251 if (CONSTANT_P (new_rtx))
252 {
253 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
254 and NEW_RTX is likely not a legitimate address. We want it to
255 disappear if it is invalid.
256
257 ??? Using the mode of the LO_SUM as the mode of the address
258 seems odd, but it was what the pre-SSA code did. */
259 if (GET_CODE (old_rtx) == LO_SUM
260 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
261 return CONSTANT;
262 return CONSTANT | PROFITABLE;
263 }
264
265 /* Allow replacements that simplify operations on a vector or complex
266 value to a component. The most prominent case is
267 (subreg ([vec_]concat ...)). */
268 if (REG_P (new_rtx)
269 && !HARD_REGISTER_P (new_rtx)
270 && (VECTOR_MODE_P (GET_MODE (from))
271 || COMPLEX_MODE_P (GET_MODE (from)))
272 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
273 return PROFITABLE;
274
275 /* Allow (subreg (mem)) -> (mem) simplifications with the following
276 exceptions:
277 1) Propagating (mem)s into multiple uses is not profitable.
278 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
279 runs less frequently.
280 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
281 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
282 ones. */
283 if (single_use_p
284 && single_ebb_p
285 && SUBREG_P (old_rtx)
286 && !paradoxical_subreg_p (x: old_rtx)
287 && MEM_P (new_rtx)
288 && !MEM_VOLATILE_P (new_rtx))
289 return PROFITABLE;
290
291 return 0;
292}
293
294/* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
295 is the number of unrelated changes that had been made before processing
296 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
297 had at that point. */
298
299void
300fwprop_propagation::note_simplification (int old_num_changes,
301 uint16_t old_result_flags,
302 rtx old_rtx, rtx new_rtx)
303{
304 result_flags &= ~(CONSTANT | PROFITABLE);
305 uint16_t new_flags = classify_result (old_rtx, new_rtx);
306 if (old_num_changes)
307 new_flags &= old_result_flags;
308 result_flags |= new_flags;
309}
310
311/* Return true if all substitutions eventually folded to constants. */
312
313bool
314fwprop_propagation::folded_to_constants_p () const
315{
316 /* If we're propagating a HIGH, require it to be folded with a
317 partnering LO_SUM. For example, a REG_EQUAL note with a register
318 replaced by an unfolded HIGH is not useful. */
319 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
320 return true;
321 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
322}
323
324
325/* Return true if it is worth keeping the result of the propagation,
326 false if it would increase the complexity of the pattern too much. */
327
328bool
329fwprop_propagation::likely_profitable_p () const
330{
331 if (changed_mem_p ())
332 return true;
333
334 if (!(result_flags & UNSIMPLIFIED)
335 && (result_flags & PROFITABLE))
336 return true;
337
338 if (REG_P (to))
339 return true;
340
341 if (GET_CODE (to) == SUBREG
342 && REG_P (SUBREG_REG (to))
343 && !paradoxical_subreg_p (x: to))
344 return true;
345
346 if (CONSTANT_P (to))
347 return true;
348
349 return false;
350}
351
352/* Check that X has a single def. */
353
354static bool
355reg_single_def_p (rtx x)
356{
357 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
358}
359
360/* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
361 USE_INSN. Return the number of substitutions on success, otherwise return
362 -1 and leave USE_INSN unchanged.
363
364 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
365 to fold to a constant, so that the note does not use any more registers
366 than it did previously. If REQUIRE_CONSTANT is false, also allow the
367 substitution if it's something we'd normally allow for the main
368 instruction pattern. */
369
370static int
371try_fwprop_subst_note (insn_info *use_insn, set_info *def,
372 rtx note, rtx dest, rtx src, bool require_constant)
373{
374 rtx_insn *use_rtl = use_insn->rtl ();
375 insn_info *def_insn = def->insn ();
376
377 insn_change_watermark watermark;
378 fwprop_propagation prop (use_insn, def, dest, src);
379 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
380 {
381 if (dump_file && (dump_flags & TDF_DETAILS))
382 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
383 " notes of insn %d: %s\n", def_insn->uid (),
384 use_insn->uid (), prop.failure_reason);
385 return -1;
386 }
387
388 if (prop.num_replacements == 0)
389 return 0;
390
391 if (require_constant)
392 {
393 if (!prop.folded_to_constants_p ())
394 {
395 if (dump_file && (dump_flags & TDF_DETAILS))
396 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
397 " notes of insn %d: %s\n", def_insn->uid (),
398 use_insn->uid (), "wouldn't fold to constants");
399 return -1;
400 }
401 }
402 else
403 {
404 if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
405 {
406 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
408 " notes of insn %d: %s\n", def_insn->uid (),
409 use_insn->uid (), "would increase complexity of node");
410 return -1;
411 }
412 }
413
414 if (dump_file && (dump_flags & TDF_DETAILS))
415 {
416 fprintf (stream: dump_file, format: "\nin notes of insn %d, replacing:\n ",
417 INSN_UID (insn: use_rtl));
418 {
419 undo_recog_changes undo (0);
420 print_inline_rtx (dump_file, note, 2);
421 }
422 fprintf (stream: dump_file, format: "\n with:\n ");
423 print_inline_rtx (dump_file, note, 2);
424 fprintf (stream: dump_file, format: "\n");
425 }
426 watermark.keep ();
427 return prop.num_replacements;
428}
429
430/* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
431 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
432 unchanged. */
433
434static bool
435try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
436 set_info *def, rtx *loc, rtx dest, rtx src)
437{
438 insn_info *use_insn = use_change.insn ();
439 rtx_insn *use_rtl = use_insn->rtl ();
440 insn_info *def_insn = def->insn ();
441
442 insn_change_watermark watermark;
443 fwprop_propagation prop (use_insn, def, dest, src);
444 if (!prop.apply_to_pattern (loc))
445 {
446 if (dump_file && (dump_flags & TDF_DETAILS))
447 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
448 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
449 prop.failure_reason);
450 return false;
451 }
452
453 if (prop.num_replacements == 0)
454 return false;
455
456 if (!prop.likely_profitable_p ()
457 && (prop.changed_mem_p ()
458 || contains_mem_rtx_p (x: src)
459 || use_insn->is_asm ()
460 || use_insn->is_debug_insn ()))
461 {
462 if (dump_file && (dump_flags & TDF_DETAILS))
463 fprintf (stream: dump_file, format: "cannot propagate from insn %d into"
464 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
465 "would increase complexity of pattern");
466 return false;
467 }
468
469 if (dump_file && (dump_flags & TDF_DETAILS))
470 {
471 fprintf (stream: dump_file, format: "\npropagating insn %d into insn %d, replacing:\n",
472 def_insn->uid (), use_insn->uid ());
473 undo_recog_changes undo (0);
474 print_rtl_single (dump_file, PATTERN (insn: use_rtl));
475 }
476
477 bool ok = recog (watermark&: attempt, change&: use_change);
478 if (ok
479 && !prop.changed_mem_p ()
480 && !use_insn->is_asm ()
481 && !use_insn->is_debug_insn ())
482 {
483 bool strict_p = !prop.likely_profitable_p ();
484 if (!change_is_worthwhile (change&: use_change, strict_p))
485 {
486 if (dump_file)
487 fprintf (stream: dump_file, format: "change not profitable");
488 ok = false;
489 }
490 }
491
492 if (!ok)
493 {
494 /* The pattern didn't match, but if all uses of SRC folded to
495 constants, we can add a REG_EQUAL note for the result, if there
496 isn't one already. */
497 if (!prop.folded_to_constants_p ())
498 return false;
499
500 /* Test this first to avoid creating an unnecessary copy of SRC. */
501 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
502 return false;
503
504 rtx set = set_for_reg_notes (use_rtl);
505 if (!set || !REG_P (SET_DEST (set)))
506 return false;
507
508 rtx value = copy_rtx (SET_SRC (set));
509 cancel_changes (0);
510
511 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
512 because the bits in there can be anything and so might not
513 match the REG_EQUAL note content. See PR70574. */
514 if (contains_paradoxical_subreg_p (SET_SRC (set)))
515 return false;
516
517 if (dump_file && (dump_flags & TDF_DETAILS))
518 fprintf (stream: dump_file, format: " Setting REG_EQUAL note\n");
519
520 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
521 }
522
523 rtx *note_ptr = &REG_NOTES (use_rtl);
524 while (rtx note = *note_ptr)
525 {
526 if ((REG_NOTE_KIND (note) == REG_EQUAL
527 || REG_NOTE_KIND (note) == REG_EQUIV)
528 && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: false) < 0)
529 {
530 *note_ptr = XEXP (note, 1);
531 free_EXPR_LIST_node (note);
532 }
533 else
534 note_ptr = &XEXP (note, 1);
535 }
536
537 confirm_change_group ();
538 crtl->ssa->change_insn (change&: use_change);
539 num_changes++;
540 return true;
541}
542
543/* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
544 given that it was not possible to do this for USE_INSN's main pattern.
545 Return true on success, otherwise leave USE_INSN unchanged. */
546
547static bool
548try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
549 rtx dest, rtx src)
550{
551 rtx_insn *use_rtl = use_insn->rtl ();
552 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
553 if ((REG_NOTE_KIND (note) == REG_EQUAL
554 || REG_NOTE_KIND (note) == REG_EQUIV)
555 && try_fwprop_subst_note (use_insn, def, note, dest, src, require_constant: true) > 0)
556 {
557 confirm_change_group ();
558 return true;
559 }
560
561 return false;
562}
563
564/* Check whether we could validly substitute (set DEST SRC), which defines DEF,
565 into USE. If so, first try performing the substitution in location LOC
566 of USE->insn ()'s pattern. If that fails, try instead to substitute
567 into the notes.
568
569 Return true on success, otherwise leave USE_INSN unchanged. */
570
571static bool
572try_fwprop_subst (use_info *use, set_info *def,
573 rtx *loc, rtx dest, rtx src)
574{
575 insn_info *use_insn = use->insn ();
576 insn_info *def_insn = def->insn ();
577
578 auto attempt = crtl->ssa->new_change_attempt ();
579 use_array src_uses = remove_note_accesses (watermark&: attempt, accesses: def_insn->uses ());
580
581 /* ??? Not really a meaningful test: it means we can propagate arithmetic
582 involving hard registers but not bare references to them. A better
583 test would be to iterate over src_uses looking for hard registers
584 that are not fixed. */
585 if (REG_P (src) && HARD_REGISTER_P (src))
586 return false;
587
588 /* ??? It would be better to make this EBB-based instead. That would
589 involve checking for equal EBBs rather than equal BBs and trying
590 to make the uses available at use_insn->ebb ()->first_bb (). */
591 if (def_insn->bb () != use_insn->bb ())
592 {
593 src_uses = crtl->ssa->make_uses_available (watermark&: attempt, uses: src_uses,
594 bb: use_insn->bb (),
595 will_be_debug_uses: use_insn->is_debug_insn ());
596 if (!src_uses.is_valid ())
597 return false;
598 }
599
600 insn_change use_change (use_insn);
601 use_change.new_uses = merge_access_arrays (watermark&: attempt, accesses1: use_change.new_uses,
602 accesses2: src_uses);
603 if (!use_change.new_uses.is_valid ())
604 return false;
605
606 /* ??? We could allow movement within the EBB by adding:
607
608 use_change.move_range = use_insn->ebb ()->insn_range (); */
609 if (!restrict_movement (change&: use_change))
610 return false;
611
612 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
613 || try_fwprop_subst_notes (use_insn, def, dest, src));
614}
615
616/* For the given single_set INSN, containing SRC known to be a
617 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
618 is redundant due to the register being set by a LOAD_EXTEND_OP
619 load from memory. */
620
621static bool
622free_load_extend (rtx src, insn_info *insn)
623{
624 rtx reg = XEXP (src, 0);
625 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
626 return false;
627
628 def_info *def = nullptr;
629 for (use_info *use : insn->uses ())
630 if (use->regno () == REGNO (reg))
631 {
632 def = use->def ();
633 break;
634 }
635
636 if (!def)
637 return false;
638
639 insn_info *def_insn = def->insn ();
640 if (def_insn->is_artificial ())
641 return false;
642
643 rtx_insn *def_rtl = def_insn->rtl ();
644 if (NONJUMP_INSN_P (def_rtl))
645 {
646 rtx patt = PATTERN (insn: def_rtl);
647
648 if (GET_CODE (patt) == SET
649 && GET_CODE (SET_SRC (patt)) == MEM
650 && rtx_equal_p (SET_DEST (patt), reg))
651 return true;
652 }
653 return false;
654}
655
656/* Subroutine of forward_propagate_subreg that handles a use of DEST
657 in REF. The other parameters are the same. */
658
659static bool
660forward_propagate_subreg (use_info *use, set_info *def,
661 rtx dest, rtx src, df_ref ref)
662{
663 scalar_int_mode int_use_mode, src_mode;
664
665 /* Only consider subregs... */
666 rtx use_reg = DF_REF_REG (ref);
667 machine_mode use_mode = GET_MODE (use_reg);
668 if (GET_CODE (use_reg) != SUBREG
669 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
670 return false;
671
672 /* ??? Replacing throughout the pattern would help for match_dups. */
673 rtx *loc = DF_REF_LOC (ref);
674 if (paradoxical_subreg_p (x: use_reg))
675 {
676 /* If this is a paradoxical SUBREG, we have no idea what value the
677 extra bits would have. However, if the operand is equivalent to
678 a SUBREG whose operand is the same as our mode, and all the modes
679 are within a word, we can just use the inner operand because
680 these SUBREGs just say how to treat the register. */
681 if (GET_CODE (src) == SUBREG
682 && REG_P (SUBREG_REG (src))
683 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
684 && GET_MODE (SUBREG_REG (src)) == use_mode
685 && subreg_lowpart_p (src))
686 return try_fwprop_subst (use, def, loc, dest: use_reg, SUBREG_REG (src));
687 }
688
689 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
690 is the low part of the reg being extended then just use the inner
691 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
692 be removed due to it matching a LOAD_EXTEND_OP load from memory,
693 or due to the operation being a no-op when applied to registers.
694 For example, if we have:
695
696 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
697 B: (... (subreg:SI (reg:DI X)) ...)
698
699 and mode_rep_extended says that Y is already sign-extended,
700 the backend will typically allow A to be combined with the
701 definition of Y or, failing that, allow A to be deleted after
702 reload through register tying. Introducing more uses of Y
703 prevents both optimisations. */
704 else if (is_a <scalar_int_mode> (m: use_mode, result: &int_use_mode)
705 && subreg_lowpart_p (use_reg))
706 {
707 if ((GET_CODE (src) == ZERO_EXTEND
708 || GET_CODE (src) == SIGN_EXTEND)
709 && is_a <scalar_int_mode> (GET_MODE (src), result: &src_mode)
710 && REG_P (XEXP (src, 0))
711 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
712 && GET_MODE (XEXP (src, 0)) == use_mode
713 && !free_load_extend (src, insn: def->insn ())
714 && (targetm.mode_rep_extended (int_use_mode, src_mode)
715 != (int) GET_CODE (src)))
716 return try_fwprop_subst (use, def, loc, dest: use_reg, XEXP (src, 0));
717 }
718
719 return false;
720}
721
722/* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
723 the result, handling cases where DEST is used in a subreg and where
724 applying that subreg to SRC results in a useful simplification. */
725
726static bool
727forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
728{
729 if (!use->includes_subregs () || !REG_P (dest))
730 return false;
731
732 if (GET_CODE (src) != SUBREG
733 && GET_CODE (src) != ZERO_EXTEND
734 && GET_CODE (src) != SIGN_EXTEND)
735 return false;
736
737 rtx_insn *use_rtl = use->insn ()->rtl ();
738 df_ref ref;
739
740 FOR_EACH_INSN_USE (ref, use_rtl)
741 if (DF_REF_REGNO (ref) == use->regno ()
742 && forward_propagate_subreg (use, def, dest, src, ref))
743 return true;
744
745 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
746 if (DF_REF_REGNO (ref) == use->regno ()
747 && forward_propagate_subreg (use, def, dest, src, ref))
748 return true;
749
750 return false;
751}
752
753/* Try to substitute (set DEST SRC), which defines DEF, into USE and
754 simplify the result. */
755
756static bool
757forward_propagate_and_simplify (use_info *use, set_info *def,
758 rtx dest, rtx src)
759{
760 insn_info *use_insn = use->insn ();
761 rtx_insn *use_rtl = use_insn->rtl ();
762 insn_info *def_insn = def->insn ();
763
764 /* ??? This check seems unnecessary. We should be able to propagate
765 into any kind of instruction, regardless of whether it's a single set.
766 It seems odd to be more permissive with asms than normal instructions. */
767 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
768 rtx use_set = single_set (insn: use_rtl);
769 if (need_single_set && !use_set)
770 return false;
771
772 /* Do not propagate into PC etc.
773
774 ??? This too seems unnecessary. The current code should work correctly
775 without it, including cases where jumps become unconditional. */
776 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
777 return false;
778
779 /* In __asm don't replace if src might need more registers than
780 reg, as that could increase register pressure on the __asm. */
781 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
782 return false;
783
784 /* Check if the def is loading something from the constant pool; in this
785 case we would undo optimization such as compress_float_constant.
786 Still, we can set a REG_EQUAL note. */
787 if (MEM_P (src) && MEM_READONLY_P (src))
788 {
789 rtx x = avoid_constant_pool_reference (src);
790 rtx note_set;
791 if (x != src
792 && (note_set = set_for_reg_notes (use_rtl))
793 && REG_P (SET_DEST (note_set))
794 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
795 {
796 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
797 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
798 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
799 if (old_rtx != new_rtx)
800 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
801 }
802 return false;
803 }
804
805 /* ??? Unconditionally propagating into PATTERN would work better
806 for instructions that have match_dups. */
807 rtx *loc = need_single_set ? &use_set : &PATTERN (insn: use_rtl);
808 return try_fwprop_subst (use, def, loc, dest, src);
809}
810
811/* Given a use USE of an insn, if it has a single reaching
812 definition, try to forward propagate it into that insn.
813 Return true if something changed.
814
815 REG_PROP_ONLY is true if we should only propagate register copies. */
816
817static bool
818forward_propagate_into (use_info *use, bool reg_prop_only = false)
819{
820 if (use->includes_read_writes ())
821 return false;
822
823 /* Disregard uninitialized uses. */
824 set_info *def = use->def ();
825 if (!def)
826 return false;
827
828 /* Only consider single-register definitions. This could be relaxed,
829 but it should rarely be needed before RA. */
830 def = look_through_degenerate_phi (access: def);
831 if (def->includes_multiregs ())
832 return false;
833
834 /* Only consider uses whose definition comes from a real instruction. */
835 insn_info *def_insn = def->insn ();
836 if (def_insn->is_artificial ())
837 return false;
838
839 rtx_insn *def_rtl = def_insn->rtl ();
840 if (!NONJUMP_INSN_P (def_rtl))
841 return false;
842 /* ??? This seems an unnecessary restriction. We can easily tell
843 which set the definition comes from. */
844 if (multiple_sets (def_rtl))
845 return false;
846 rtx def_set = simple_regno_set (PATTERN (insn: def_rtl), def->regno ());
847 if (!def_set)
848 return false;
849
850 rtx dest = SET_DEST (def_set);
851 rtx src = SET_SRC (def_set);
852 if (volatile_refs_p (src))
853 return false;
854
855 /* Allow propagations into a loop only for reg-to-reg copies, since
856 replacing one register by another shouldn't increase the cost.
857 Propagations from inner loop to outer loop should also be ok. */
858 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
859 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
860 if ((reg_prop_only
861 || (def_loop != use_loop
862 && !flow_loop_nested_p (use_loop, def_loop)))
863 && (!reg_single_def_p (x: dest) || !reg_single_def_p (x: src)))
864 return false;
865
866 /* Don't substitute into a non-local goto, this confuses CFG. */
867 insn_info *use_insn = use->insn ();
868 rtx_insn *use_rtl = use_insn->rtl ();
869 if (JUMP_P (use_rtl)
870 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
871 return false;
872
873 if (forward_propagate_and_simplify (use, def, dest, src)
874 || forward_propagate_subreg (use, def, dest, src))
875 return true;
876
877 return false;
878}
879
880static void
881fwprop_init (void)
882{
883 num_changes = 0;
884 calculate_dominance_info (CDI_DOMINATORS);
885
886 /* We do not always want to propagate into loops, so we have to find
887 loops and be careful about them. Avoid CFG modifications so that
888 we don't have to update dominance information afterwards for
889 build_single_def_use_links. */
890 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
891
892 df_analyze ();
893 crtl->ssa = new rtl_ssa::function_info (cfun);
894}
895
896static void
897fwprop_done (void)
898{
899 loop_optimizer_finalize ();
900
901 crtl->ssa->perform_pending_updates ();
902 free_dominance_info (CDI_DOMINATORS);
903 cleanup_cfg (0);
904
905 delete crtl->ssa;
906 crtl->ssa = nullptr;
907
908 delete_trivially_dead_insns (get_insns (), max_reg_num ());
909
910 if (dump_file)
911 fprintf (stream: dump_file,
912 format: "\nNumber of successful forward propagations: %d\n\n",
913 num_changes);
914}
915
916/* Try to optimize INSN, returning true if something changes.
917 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
918 the full fwprop. */
919
920static bool
921fwprop_insn (insn_info *insn, bool fwprop_addr_p)
922{
923 for (use_info *use : insn->uses ())
924 {
925 if (use->is_mem ())
926 continue;
927 /* ??? The choices here follow those in the pre-SSA code. */
928 if (!use->includes_address_uses ())
929 {
930 if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p))
931 return true;
932 }
933 else
934 {
935 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
936 /* The outermost loop is not really a loop. */
937 if (loop == NULL || loop_outer (loop) == NULL)
938 {
939 if (forward_propagate_into (use, reg_prop_only: fwprop_addr_p))
940 return true;
941 }
942 else if (fwprop_addr_p)
943 {
944 if (forward_propagate_into (use, reg_prop_only: false))
945 return true;
946 }
947 }
948 }
949 return false;
950}
951
952/* Main entry point. */
953
954static bool
955gate_fwprop (void)
956{
957 return optimize > 0 && flag_forward_propagate;
958}
959
960static unsigned int
961fwprop (bool fwprop_addr_p)
962{
963 fwprop_init ();
964
965 /* Go through all the instructions (including debug instructions) looking
966 for uses that we could propagate into.
967
968 Do not forward propagate addresses into loops until after unrolling.
969 CSE did so because it was able to fix its own mess, but we are not. */
970
971 insn_info *next;
972
973 /* ??? This code uses a worklist in order to preserve the behavior
974 of the pre-SSA implementation. It would be better to instead
975 iterate on each instruction until no more propagations are
976 possible, then move on to the next. */
977 auto_vec<insn_info *> worklist;
978 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
979 {
980 next = insn->next_any_insn ();
981 if (insn->can_be_optimized () || insn->is_debug_insn ())
982 if (fwprop_insn (insn, fwprop_addr_p))
983 worklist.safe_push (obj: insn);
984 }
985 for (unsigned int i = 0; i < worklist.length (); ++i)
986 {
987 insn_info *insn = worklist[i];
988 if (fwprop_insn (insn, fwprop_addr_p))
989 worklist.safe_push (obj: insn);
990 }
991
992 fwprop_done ();
993 return 0;
994}
995
996namespace {
997
998const pass_data pass_data_rtl_fwprop =
999{
1000 .type: RTL_PASS, /* type */
1001 .name: "fwprop1", /* name */
1002 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1003 .tv_id: TV_FWPROP, /* tv_id */
1004 .properties_required: 0, /* properties_required */
1005 .properties_provided: 0, /* properties_provided */
1006 .properties_destroyed: 0, /* properties_destroyed */
1007 .todo_flags_start: 0, /* todo_flags_start */
1008 TODO_df_finish, /* todo_flags_finish */
1009};
1010
1011class pass_rtl_fwprop : public rtl_opt_pass
1012{
1013public:
1014 pass_rtl_fwprop (gcc::context *ctxt)
1015 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1016 {}
1017
1018 /* opt_pass methods: */
1019 bool gate (function *) final override { return gate_fwprop (); }
1020 unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: false); }
1021
1022}; // class pass_rtl_fwprop
1023
1024} // anon namespace
1025
1026rtl_opt_pass *
1027make_pass_rtl_fwprop (gcc::context *ctxt)
1028{
1029 return new pass_rtl_fwprop (ctxt);
1030}
1031
1032namespace {
1033
1034const pass_data pass_data_rtl_fwprop_addr =
1035{
1036 .type: RTL_PASS, /* type */
1037 .name: "fwprop2", /* name */
1038 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1039 .tv_id: TV_FWPROP, /* tv_id */
1040 .properties_required: 0, /* properties_required */
1041 .properties_provided: 0, /* properties_provided */
1042 .properties_destroyed: 0, /* properties_destroyed */
1043 .todo_flags_start: 0, /* todo_flags_start */
1044 TODO_df_finish, /* todo_flags_finish */
1045};
1046
1047class pass_rtl_fwprop_addr : public rtl_opt_pass
1048{
1049public:
1050 pass_rtl_fwprop_addr (gcc::context *ctxt)
1051 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1052 {}
1053
1054 /* opt_pass methods: */
1055 bool gate (function *) final override { return gate_fwprop (); }
1056 unsigned int execute (function *) final override { return fwprop (fwprop_addr_p: true); }
1057
1058}; // class pass_rtl_fwprop_addr
1059
1060} // anon namespace
1061
1062rtl_opt_pass *
1063make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1064{
1065 return new pass_rtl_fwprop_addr (ctxt);
1066}
1067

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of gcc/fwprop.cc