1 | /* Detect paths through the CFG which can never be executed in a conforming |
---|---|
2 | program and isolate them. |
3 | |
4 | Copyright (C) 2013-2025 Free Software Foundation, Inc. |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by |
10 | the Free Software Foundation; either version 3, or (at your option) |
11 | any later version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "tree.h" |
27 | #include "gimple.h" |
28 | #include "cfghooks.h" |
29 | #include "tree-pass.h" |
30 | #include "ssa.h" |
31 | #include "diagnostic-core.h" |
32 | #include "fold-const.h" |
33 | #include "gimple-iterator.h" |
34 | #include "gimple-walk.h" |
35 | #include "tree-ssa.h" |
36 | #include "cfgloop.h" |
37 | #include "tree-cfg.h" |
38 | #include "cfganal.h" |
39 | #include "intl.h" |
40 | |
41 | |
42 | static bool cfg_altered; |
43 | |
44 | /* Callback for walk_stmt_load_store_ops. |
45 | |
46 | Return TRUE if OP will dereference the tree stored in DATA, FALSE |
47 | otherwise. |
48 | |
49 | This routine only makes a superficial check for a dereference. Thus, |
50 | it must only be used if it is safe to return a false negative. */ |
51 | static bool |
52 | check_loadstore (gimple *stmt, tree op, tree, void *data) |
53 | { |
54 | if ((TREE_CODE (op) == MEM_REF || TREE_CODE (op) == TARGET_MEM_REF) |
55 | && operand_equal_p (TREE_OPERAND (op, 0), (tree)data, flags: 0)) |
56 | { |
57 | TREE_THIS_VOLATILE (op) = 1; |
58 | TREE_SIDE_EFFECTS (op) = 1; |
59 | update_stmt (s: stmt); |
60 | return true; |
61 | } |
62 | return false; |
63 | } |
64 | |
65 | static vec<gimple *> *bb_split_points; |
66 | |
67 | /* Insert a trap after SI and split the block after the trap. */ |
68 | |
69 | static void |
70 | insert_trap (gimple_stmt_iterator *si_p, tree op) |
71 | { |
72 | /* We want the NULL pointer dereference to actually occur so that |
73 | code that wishes to catch the signal can do so. |
74 | |
75 | If the dereference is a load, then there's nothing to do as the |
76 | LHS will be a throw-away SSA_NAME and the RHS is the NULL dereference. |
77 | |
78 | If the dereference is a store and we can easily transform the RHS, |
79 | then simplify the RHS to enable more DCE. Note that we require the |
80 | statement to be a GIMPLE_ASSIGN which filters out calls on the RHS. */ |
81 | gimple *stmt = gsi_stmt (i: *si_p); |
82 | if (walk_stmt_load_store_ops (stmt, (void *)op, NULL, check_loadstore) |
83 | && is_gimple_assign (gs: stmt) |
84 | && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))) |
85 | { |
86 | /* We just need to turn the RHS into zero converted to the proper |
87 | type. */ |
88 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
89 | gimple_assign_set_rhs_code (s: stmt, code: INTEGER_CST); |
90 | gimple_assign_set_rhs1 (gs: stmt, fold_convert (type, integer_zero_node)); |
91 | update_stmt (s: stmt); |
92 | } |
93 | |
94 | gcall *new_stmt |
95 | = gimple_build_call (builtin_decl_explicit (fncode: BUILT_IN_TRAP), 0); |
96 | gimple_seq seq = NULL; |
97 | gimple_seq_add_stmt (&seq, new_stmt); |
98 | |
99 | /* If we had a NULL pointer dereference, then we want to insert the |
100 | __builtin_trap after the statement, for the other cases we want |
101 | to insert before the statement. */ |
102 | if (walk_stmt_load_store_ops (stmt, (void *)op, |
103 | check_loadstore, |
104 | check_loadstore)) |
105 | { |
106 | gsi_insert_after (si_p, seq, GSI_NEW_STMT); |
107 | if (stmt_ends_bb_p (stmt)) |
108 | { |
109 | if (dom_info_available_p (CDI_POST_DOMINATORS)) |
110 | bb_split_points->safe_push (obj: stmt); |
111 | else |
112 | split_block (gimple_bb (g: stmt), stmt); |
113 | return; |
114 | } |
115 | } |
116 | else |
117 | gsi_insert_before (si_p, seq, GSI_NEW_STMT); |
118 | |
119 | if (dom_info_available_p (CDI_POST_DOMINATORS)) |
120 | bb_split_points->safe_push (obj: new_stmt); |
121 | else |
122 | split_block (gimple_bb (g: new_stmt), new_stmt); |
123 | *si_p = gsi_for_stmt (stmt); |
124 | } |
125 | |
126 | /* BB when reached via incoming edge E will exhibit undefined behavior |
127 | at STMT. Isolate and optimize the path which exhibits undefined |
128 | behavior. |
129 | |
130 | Isolation is simple. Duplicate BB and redirect E to BB'. |
131 | |
132 | Optimization is simple as well. Replace STMT in BB' with an |
133 | unconditional trap and remove all outgoing edges from BB'. |
134 | |
135 | If RET_ZERO, do not trap, only return NULL. |
136 | |
137 | DUPLICATE is a pre-existing duplicate, use it as BB' if it exists. |
138 | |
139 | Return BB' (which may be equal to DUPLICATE). */ |
140 | |
141 | ATTRIBUTE_RETURNS_NONNULL basic_block |
142 | isolate_path (basic_block bb, basic_block duplicate, |
143 | edge e, gimple *stmt, tree op, bool ret_zero) |
144 | { |
145 | gimple_stmt_iterator si, si2; |
146 | edge_iterator ei; |
147 | edge e2; |
148 | bool impossible = true; |
149 | profile_count count = e->count (); |
150 | |
151 | for (si = gsi_start_bb (bb); gsi_stmt (i: si) != stmt; gsi_next (i: &si)) |
152 | if (stmt_can_terminate_bb_p (gsi_stmt (i: si))) |
153 | { |
154 | impossible = false; |
155 | break; |
156 | } |
157 | force_edge_cold (e, impossible); |
158 | |
159 | /* First duplicate BB if we have not done so already and remove all |
160 | the duplicate's outgoing edges as duplicate is going to unconditionally |
161 | trap. Removing the outgoing edges is both an optimization and ensures |
162 | we don't need to do any PHI node updates. */ |
163 | if (!duplicate) |
164 | { |
165 | duplicate = duplicate_block (bb, NULL, NULL); |
166 | duplicate->count = profile_count::zero (); |
167 | if (!ret_zero) |
168 | for (ei = ei_start (duplicate->succs); (e2 = ei_safe_edge (i: ei)); ) |
169 | remove_edge (e2); |
170 | } |
171 | bb->count -= count; |
172 | |
173 | /* Complete the isolation step by redirecting E to reach DUPLICATE. */ |
174 | e2 = redirect_edge_and_branch (e, duplicate); |
175 | if (e2) |
176 | { |
177 | flush_pending_stmts (e2); |
178 | |
179 | /* Update profile only when redirection is really processed. */ |
180 | bb->count += e->count (); |
181 | } |
182 | |
183 | /* There may be more than one statement in DUPLICATE which exhibits |
184 | undefined behavior. Ultimately we want the first such statement in |
185 | DUPLCIATE so that we're able to delete as much code as possible. |
186 | |
187 | So each time we discover undefined behavior in DUPLICATE, search for |
188 | the statement which triggers undefined behavior. If found, then |
189 | transform the statement into a trap and delete everything after the |
190 | statement. If not found, then this particular instance was subsumed by |
191 | an earlier instance of undefined behavior and there's nothing to do. |
192 | |
193 | This is made more complicated by the fact that we have STMT, which is in |
194 | BB rather than in DUPLICATE. So we set up two iterators, one for each |
195 | block and walk forward looking for STMT in BB, advancing each iterator at |
196 | each step. |
197 | |
198 | When we find STMT the second iterator should point to STMT's equivalent in |
199 | duplicate. If DUPLICATE ends before STMT is found in BB, then there's |
200 | nothing to do. |
201 | |
202 | Ignore labels and debug statements. */ |
203 | si = gsi_start_nondebug_after_labels_bb (bb); |
204 | si2 = gsi_start_nondebug_after_labels_bb (bb: duplicate); |
205 | while (!gsi_end_p (i: si) && !gsi_end_p (i: si2) && gsi_stmt (i: si) != stmt) |
206 | { |
207 | gsi_next_nondebug (i: &si); |
208 | gsi_next_nondebug (i: &si2); |
209 | } |
210 | |
211 | /* This would be an indicator that we never found STMT in BB, which should |
212 | never happen. */ |
213 | gcc_assert (!gsi_end_p (si)); |
214 | |
215 | /* If we did not run to the end of DUPLICATE, then SI points to STMT and |
216 | SI2 points to the duplicate of STMT in DUPLICATE. Insert a trap |
217 | before SI2 and remove SI2 and all trailing statements. */ |
218 | if (!gsi_end_p (i: si2)) |
219 | { |
220 | if (ret_zero) |
221 | { |
222 | greturn *ret = as_a <greturn *> (p: gsi_stmt (i: si2)); |
223 | tree zero = build_zero_cst (TREE_TYPE (gimple_return_retval (ret))); |
224 | gimple_return_set_retval (gs: ret, retval: zero); |
225 | update_stmt (s: ret); |
226 | } |
227 | else |
228 | insert_trap (si_p: &si2, op); |
229 | } |
230 | |
231 | return duplicate; |
232 | } |
233 | |
234 | /* Return TRUE if STMT is a div/mod operation using DIVISOR as the divisor. |
235 | FALSE otherwise. */ |
236 | |
237 | static bool |
238 | is_divmod_with_given_divisor (gimple *stmt, tree divisor) |
239 | { |
240 | /* Only assignments matter. */ |
241 | if (!is_gimple_assign (gs: stmt)) |
242 | return false; |
243 | |
244 | /* Check for every DIV/MOD expression. */ |
245 | enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt); |
246 | if (rhs_code == TRUNC_DIV_EXPR |
247 | || rhs_code == FLOOR_DIV_EXPR |
248 | || rhs_code == CEIL_DIV_EXPR |
249 | || rhs_code == EXACT_DIV_EXPR |
250 | || rhs_code == ROUND_DIV_EXPR |
251 | || rhs_code == TRUNC_MOD_EXPR |
252 | || rhs_code == FLOOR_MOD_EXPR |
253 | || rhs_code == CEIL_MOD_EXPR |
254 | || rhs_code == ROUND_MOD_EXPR) |
255 | { |
256 | /* Pointer equality is fine when DIVISOR is an SSA_NAME, but |
257 | not sufficient for constants which may have different types. */ |
258 | if (operand_equal_p (gimple_assign_rhs2 (gs: stmt), divisor, flags: 0)) |
259 | return true; |
260 | } |
261 | return false; |
262 | } |
263 | |
264 | /* NAME is an SSA_NAME that we have already determined has the value 0 or NULL. |
265 | |
266 | Return TRUE if USE_STMT uses NAME in a way where a 0 or NULL value results |
267 | in undefined behavior, FALSE otherwise |
268 | |
269 | LOC is used for issuing diagnostics. This case represents potential |
270 | undefined behavior exposed by path splitting and that's reflected in |
271 | the diagnostic. */ |
272 | |
273 | bool |
274 | stmt_uses_name_in_undefined_way (gimple *use_stmt, tree name, location_t loc) |
275 | { |
276 | /* If we are working with a non pointer type, then see |
277 | if this use is a DIV/MOD operation using NAME as the |
278 | divisor. */ |
279 | if (!POINTER_TYPE_P (TREE_TYPE (name))) |
280 | { |
281 | if (!cfun->can_throw_non_call_exceptions) |
282 | return is_divmod_with_given_divisor (stmt: use_stmt, divisor: name); |
283 | return false; |
284 | } |
285 | |
286 | /* NAME is a pointer, so see if it's used in a context where it must |
287 | be non-NULL. */ |
288 | bool by_dereference |
289 | = infer_nonnull_range_by_dereference (use_stmt, name); |
290 | |
291 | if (by_dereference |
292 | || infer_nonnull_range_by_attribute (use_stmt, name)) |
293 | { |
294 | |
295 | if (by_dereference) |
296 | { |
297 | warning_at (loc, OPT_Wnull_dereference, |
298 | "potential null pointer dereference"); |
299 | if (!flag_isolate_erroneous_paths_dereference) |
300 | return false; |
301 | } |
302 | else |
303 | { |
304 | if (!flag_isolate_erroneous_paths_attribute) |
305 | return false; |
306 | } |
307 | return true; |
308 | } |
309 | return false; |
310 | } |
311 | |
312 | /* Return TRUE if USE_STMT uses 0 or NULL in a context which results in |
313 | undefined behavior, FALSE otherwise. |
314 | |
315 | These cases are explicit in the IL. */ |
316 | |
317 | bool |
318 | stmt_uses_0_or_null_in_undefined_way (gimple *stmt) |
319 | { |
320 | if (!cfun->can_throw_non_call_exceptions |
321 | && is_divmod_with_given_divisor (stmt, integer_zero_node)) |
322 | return true; |
323 | |
324 | /* By passing null_pointer_node, we can use the |
325 | infer_nonnull_range functions to detect explicit NULL |
326 | pointer dereferences and other uses where a non-NULL |
327 | value is required. */ |
328 | |
329 | bool by_dereference |
330 | = infer_nonnull_range_by_dereference (stmt, null_pointer_node); |
331 | if (by_dereference |
332 | || infer_nonnull_range_by_attribute (stmt, null_pointer_node)) |
333 | { |
334 | if (by_dereference) |
335 | { |
336 | location_t loc = gimple_location (g: stmt); |
337 | warning_at (loc, OPT_Wnull_dereference, |
338 | "null pointer dereference"); |
339 | if (!flag_isolate_erroneous_paths_dereference) |
340 | return false; |
341 | } |
342 | else |
343 | { |
344 | if (!flag_isolate_erroneous_paths_attribute) |
345 | return false; |
346 | } |
347 | return true; |
348 | } |
349 | return false; |
350 | } |
351 | |
352 | /* Describes the property of a return statement that may return |
353 | the address of one or more local variables. The type must |
354 | be safely assignable and copyable so that it can be stored in |
355 | a hash_map. */ |
356 | class args_loc_t |
357 | { |
358 | public: |
359 | |
360 | args_loc_t (): nargs (), locvec (), ptr (&ptr) |
361 | { |
362 | locvec.create (nelems: 4); |
363 | } |
364 | |
365 | args_loc_t (const args_loc_t &rhs) |
366 | : nargs (rhs.nargs), locvec (rhs.locvec.copy ()), ptr (&ptr) { } |
367 | |
368 | args_loc_t& operator= (const args_loc_t &rhs) |
369 | { |
370 | nargs = rhs.nargs; |
371 | locvec.release (); |
372 | locvec = rhs.locvec.copy (); |
373 | return *this; |
374 | } |
375 | |
376 | ~args_loc_t () |
377 | { |
378 | locvec.release (); |
379 | gcc_assert (ptr == &ptr); |
380 | } |
381 | |
382 | /* For a PHI in a return statement its number of arguments. When greater |
383 | than LOCVEC.LENGTH () implies that an address of one of the locals in |
384 | LOCVEC may but need not be returned by the statement. Otherwise, |
385 | unless both are zero, it implies it definitely is returned. */ |
386 | unsigned nargs; |
387 | /* The locations of local variables/alloca calls returned by the return |
388 | statement. Avoid using auto_vec here since it's not safe to copy due |
389 | to pr90904. */ |
390 | vec <location_t> locvec; |
391 | void *ptr; |
392 | }; |
393 | |
394 | /* A mapping from a return statement to the locations of local variables |
395 | whose addresses it may return. */ |
396 | typedef hash_map <gimple *, args_loc_t> locmap_t; |
397 | |
398 | /* Given the LOCMAP mapping, issue diagnostics about returning addresses |
399 | of local variables. When MAYBE is set, all diagnostics will be of |
400 | the "may return" kind. Otherwise each will be determined based on |
401 | the equality of the corresponding NARGS and LOCVEC.LENGTH () values. */ |
402 | |
403 | static void |
404 | diag_returned_locals (bool maybe, const locmap_t &locmap) |
405 | { |
406 | for (locmap_t::iterator it = locmap.begin (); it != locmap.end (); ++it) |
407 | { |
408 | gimple *stmt = (*it).first; |
409 | const args_loc_t &argsloc = (*it).second; |
410 | location_t stmtloc = gimple_location (g: stmt); |
411 | if (stmtloc == UNKNOWN_LOCATION) |
412 | /* When multiple return statements are merged into one it |
413 | may not have an associated location. Use the location |
414 | of the closing brace instead. */ |
415 | stmtloc = cfun->function_end_locus; |
416 | |
417 | auto_diagnostic_group d; |
418 | unsigned nargs = argsloc.locvec.length (); |
419 | if (warning_at (stmtloc, OPT_Wreturn_local_addr, |
420 | (maybe || argsloc.nargs > nargs |
421 | ? G_("function may return address of local variable") |
422 | : G_("function returns address of local variable")))) |
423 | { |
424 | for (unsigned i = 0; i != nargs; ++i) |
425 | inform (argsloc.locvec[i], "declared here"); |
426 | } |
427 | } |
428 | } |
429 | |
430 | /* Return true if EXPR is an expression of pointer type that refers |
431 | to the address of one or more variables with automatic storage |
432 | duration. If so, add an entry to *PLOCMAP and insert into |
433 | PLOCMAP->LOCVEC the locations of the corresponding local variables |
434 | whose address is returned by the RETURN_STMT (which may be set to |
435 | (gimple*)-1 as a placeholder for such a statement). VISITED is |
436 | a bitmap of PHI nodes already visited by recursive calls. When |
437 | null, PHI expressions are not considered. */ |
438 | |
439 | static bool |
440 | is_addr_local (gimple *return_stmt, tree exp, locmap_t *plocmap, |
441 | hash_set<gphi *> *visited) |
442 | { |
443 | if (TREE_CODE (exp) == ADDR_EXPR) |
444 | { |
445 | tree baseaddr = get_base_address (TREE_OPERAND (exp, 0)); |
446 | if (TREE_CODE (baseaddr) == MEM_REF) |
447 | return is_addr_local (return_stmt, TREE_OPERAND (baseaddr, 0), |
448 | plocmap, visited); |
449 | |
450 | if ((!VAR_P (baseaddr) |
451 | || is_global_var (t: baseaddr)) |
452 | && TREE_CODE (baseaddr) != PARM_DECL) |
453 | return false; |
454 | |
455 | args_loc_t &argsloc = plocmap->get_or_insert (k: return_stmt); |
456 | argsloc.locvec.safe_push (DECL_SOURCE_LOCATION (baseaddr)); |
457 | return true; |
458 | } |
459 | |
460 | if (!POINTER_TYPE_P (TREE_TYPE (exp))) |
461 | return false; |
462 | |
463 | if (TREE_CODE (exp) == SSA_NAME) |
464 | { |
465 | gimple *def_stmt = SSA_NAME_DEF_STMT (exp); |
466 | enum gimple_code code = gimple_code (g: def_stmt); |
467 | |
468 | if (is_gimple_assign (gs: def_stmt)) |
469 | { |
470 | tree type = TREE_TYPE (gimple_assign_lhs (def_stmt)); |
471 | if (POINTER_TYPE_P (type)) |
472 | { |
473 | tree_code code = gimple_assign_rhs_code (gs: def_stmt); |
474 | tree ptr1 = NULL_TREE, ptr2 = NULL_TREE; |
475 | |
476 | /* Set to the number of arguments examined that should |
477 | be added to ARGSLOC->NARGS to identify expressions |
478 | only some but not all of whose operands refer to local |
479 | addresses. */ |
480 | unsigned nargs = 0; |
481 | if (code == COND_EXPR) |
482 | { |
483 | ptr1 = gimple_assign_rhs2 (gs: def_stmt); |
484 | ptr2 = gimple_assign_rhs3 (gs: def_stmt); |
485 | nargs = 2; |
486 | } |
487 | else if (code == MAX_EXPR || code == MIN_EXPR) |
488 | { |
489 | ptr1 = gimple_assign_rhs1 (gs: def_stmt); |
490 | ptr2 = gimple_assign_rhs2 (gs: def_stmt); |
491 | nargs = 2; |
492 | } |
493 | else if (code == ADDR_EXPR |
494 | || code == NOP_EXPR |
495 | || code == POINTER_PLUS_EXPR) |
496 | /* Leave NARGS at zero and let the recursive call set it. */ |
497 | ptr1 = gimple_assign_rhs1 (gs: def_stmt); |
498 | |
499 | /* Avoid short-circuiting the logical OR result in case |
500 | both operands refer to local variables, in which case |
501 | both should be considered and identified in the warning. */ |
502 | bool res1 = false, res2 = false; |
503 | if (ptr1) |
504 | res1 = is_addr_local (return_stmt, exp: ptr1, plocmap, visited); |
505 | if (ptr2) |
506 | res2 = is_addr_local (return_stmt, exp: ptr2, plocmap, visited); |
507 | |
508 | if (nargs) |
509 | if (args_loc_t *argsloc = plocmap->get (k: return_stmt)) |
510 | argsloc->nargs += nargs; |
511 | |
512 | return res1 || res2; |
513 | } |
514 | return false; |
515 | } |
516 | |
517 | if (code == GIMPLE_CALL |
518 | && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)) |
519 | { |
520 | /* Handle alloca and friends that return pointers to automatic |
521 | storage. */ |
522 | tree fn = gimple_call_fndecl (gs: def_stmt); |
523 | int code = DECL_FUNCTION_CODE (decl: fn); |
524 | if (code == BUILT_IN_ALLOCA |
525 | || code == BUILT_IN_ALLOCA_WITH_ALIGN |
526 | || code == BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX) |
527 | { |
528 | args_loc_t &argsloc = plocmap->get_or_insert (k: return_stmt); |
529 | argsloc.locvec.safe_push (obj: gimple_location (g: def_stmt)); |
530 | return true; |
531 | } |
532 | |
533 | if (gimple_call_num_args (gs: def_stmt) < 1) |
534 | return false; |
535 | |
536 | /* Recursively examine the first argument of calls to built-ins |
537 | that return it. */ |
538 | switch (code) |
539 | { |
540 | case BUILT_IN_MEMCPY: |
541 | case BUILT_IN_MEMCPY_CHK: |
542 | case BUILT_IN_MEMPCPY: |
543 | case BUILT_IN_MEMPCPY_CHK: |
544 | case BUILT_IN_MEMMOVE: |
545 | case BUILT_IN_MEMMOVE_CHK: |
546 | case BUILT_IN_STPCPY: |
547 | case BUILT_IN_STPCPY_CHK: |
548 | case BUILT_IN_STPNCPY: |
549 | case BUILT_IN_STPNCPY_CHK: |
550 | case BUILT_IN_STRCAT: |
551 | case BUILT_IN_STRCAT_CHK: |
552 | case BUILT_IN_STRCHR: |
553 | case BUILT_IN_STRCPY: |
554 | case BUILT_IN_STRCPY_CHK: |
555 | case BUILT_IN_STRNCAT: |
556 | case BUILT_IN_STRNCAT_CHK: |
557 | case BUILT_IN_STRNCPY: |
558 | case BUILT_IN_STRNCPY_CHK: |
559 | case BUILT_IN_STRRCHR: |
560 | case BUILT_IN_STRSTR: |
561 | return is_addr_local (return_stmt, |
562 | exp: gimple_call_arg (gs: def_stmt, index: 0), |
563 | plocmap, visited); |
564 | default: |
565 | return false; |
566 | } |
567 | } |
568 | |
569 | if (code == GIMPLE_PHI && visited) |
570 | { |
571 | gphi *phi_stmt = as_a <gphi *> (p: def_stmt); |
572 | if (visited->add (k: phi_stmt)) |
573 | return false; |
574 | |
575 | unsigned count = 0; |
576 | unsigned nargs = gimple_phi_num_args (gs: phi_stmt); |
577 | args_loc_t &argsloc = plocmap->get_or_insert (k: return_stmt); |
578 | /* Bump up the number of operands examined by the number of |
579 | operands of this PHI. */ |
580 | argsloc.nargs += nargs; |
581 | for (unsigned i = 0; i < gimple_phi_num_args (gs: phi_stmt); ++i) |
582 | { |
583 | tree arg = gimple_phi_arg_def (gs: phi_stmt, index: i); |
584 | if (is_addr_local (return_stmt, exp: arg, plocmap, visited)) |
585 | ++count; |
586 | } |
587 | return count != 0; |
588 | } |
589 | } |
590 | |
591 | return false; |
592 | } |
593 | |
594 | /* Detect returning the address of a local variable in a PHI result LHS |
595 | and argument ARG and PHI edge E in basic block BB. Add an entry for |
596 | each use to LOCMAP, setting its NARGS member to the NARGS argument |
597 | (the number of PHI operands) plus the number of arguments in binary |
598 | expressions refereced by ARG. Call isolate_path for each returned |
599 | address and set *ISOLATED to true if called. |
600 | Return either DUPLICATE or the most recent result of isolate_path. */ |
601 | |
602 | static basic_block |
603 | handle_return_addr_local_phi_arg (basic_block bb, basic_block duplicate, |
604 | tree lhs, tree arg, edge e, locmap_t &locmap, |
605 | unsigned nargs, bool *isolated) |
606 | { |
607 | /* Use (gimple*)-1 as a temporary placeholder and replace it with |
608 | the return statement below once it is known. Using a null doesn't |
609 | work because it's used by the hash_map to mean "no-entry." Pass |
610 | null instead of a visited_phis bitmap to avoid descending into |
611 | PHIs since they are being processed by the caller. Those that |
612 | remain will be checked again later. */ |
613 | if (!is_addr_local (return_stmt: (gimple*)-1, exp: arg, plocmap: &locmap, NULL)) |
614 | { |
615 | /* Remove the placeholder regardless of success or failure. */ |
616 | locmap.remove (k: (gimple*)-1); |
617 | return duplicate; |
618 | } |
619 | |
620 | const args_loc_t* const placeargsloc = locmap.get (k: (gimple*)-1); |
621 | const unsigned nlocs = placeargsloc->locvec.length (); |
622 | gcc_assert (nlocs); |
623 | |
624 | /* Add to the number of PHI arguments determined by the caller |
625 | the number of operands of the expressions referenced by ARG. |
626 | This lets the caller determine whether it's dealing with |
627 | a "may return" or "definitely returns." */ |
628 | nargs += placeargsloc->nargs; |
629 | |
630 | /* Set to true if any expressions referenced by ARG involve |
631 | multiple addresses only some of which are those of locals. */ |
632 | bool maybe = placeargsloc->nargs > placeargsloc->locvec.length (); |
633 | |
634 | gimple *use_stmt; |
635 | imm_use_iterator iter; |
636 | |
637 | /* Look for uses of the PHI result LHS in return statements. */ |
638 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |
639 | { |
640 | greturn *return_stmt = dyn_cast <greturn *> (p: use_stmt); |
641 | if (!return_stmt) |
642 | continue; |
643 | |
644 | if (gimple_return_retval (gs: return_stmt) != lhs) |
645 | continue; |
646 | |
647 | /* Add an entry for the return statement and the locations |
648 | oof the PHI arguments obtained above to the map. */ |
649 | args_loc_t &argsloc = locmap.get_or_insert (k: use_stmt); |
650 | argsloc.nargs = nargs; |
651 | unsigned nelts = argsloc.locvec.length () + nlocs; |
652 | argsloc.locvec.reserve (nelems: nelts); |
653 | argsloc.locvec.splice (src: placeargsloc->locvec); |
654 | |
655 | if (!maybe |
656 | && (flag_isolate_erroneous_paths_dereference |
657 | || flag_isolate_erroneous_paths_attribute) |
658 | && gimple_bb (g: use_stmt) == bb |
659 | && (duplicate || can_duplicate_block_p (bb))) |
660 | { |
661 | duplicate = isolate_path (bb, duplicate, e, |
662 | stmt: use_stmt, op: lhs, ret_zero: true); |
663 | |
664 | /* Let caller know the path has been isolated. */ |
665 | *isolated = true; |
666 | } |
667 | } |
668 | |
669 | locmap.remove (k: (gimple*)-1); |
670 | |
671 | return duplicate; |
672 | } |
673 | |
674 | /* Look for PHI nodes which feed statements in the same block where |
675 | the value of the PHI node implies the statement is erroneous. |
676 | |
677 | For example, a NULL PHI arg value which then feeds a pointer |
678 | dereference. |
679 | |
680 | When found isolate and optimize the path associated with the PHI |
681 | argument feeding the erroneous statement. */ |
682 | static void |
683 | find_implicit_erroneous_behavior (void) |
684 | { |
685 | locmap_t locmap; |
686 | |
687 | basic_block bb; |
688 | |
689 | FOR_EACH_BB_FN (bb, cfun) |
690 | { |
691 | gphi_iterator si; |
692 | |
693 | /* Out of an abundance of caution, do not isolate paths to a |
694 | block where the block has any abnormal outgoing edges. |
695 | |
696 | We might be able to relax this in the future. We have to detect |
697 | when we have to split the block with the NULL dereference and |
698 | the trap we insert. We have to preserve abnormal edges out |
699 | of the isolated block which in turn means updating PHIs at |
700 | the targets of those abnormal outgoing edges. */ |
701 | if (has_abnormal_or_eh_outgoing_edge_p (bb)) |
702 | continue; |
703 | |
704 | |
705 | /* If BB has an edge to itself, then duplication of BB below |
706 | could result in reallocation of BB's PHI nodes. If that happens |
707 | then the loop below over the PHIs would use the old PHI and |
708 | thus invalid information. We don't have a good way to know |
709 | if a PHI has been reallocated, so just avoid isolation in |
710 | this case. */ |
711 | if (find_edge (bb, bb)) |
712 | continue; |
713 | |
714 | /* First look for a PHI which sets a pointer to NULL and which |
715 | is then dereferenced within BB. This is somewhat overly |
716 | conservative, but probably catches most of the interesting |
717 | cases. */ |
718 | for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si)) |
719 | { |
720 | gphi *phi = si.phi (); |
721 | tree lhs = gimple_phi_result (gs: phi); |
722 | |
723 | /* Initial number of PHI arguments. The result may change |
724 | from one iteration of the loop below to the next in |
725 | response to changes to the CFG but only the initial |
726 | value is stored below for use by diagnostics. */ |
727 | unsigned nargs = gimple_phi_num_args (gs: phi); |
728 | |
729 | /* PHI produces a pointer result. See if any of the PHI's |
730 | arguments are NULL. |
731 | |
732 | When we remove an edge, we want to reprocess the current |
733 | index since the argument at that index will have been |
734 | removed, hence the ugly way we update I for each iteration. */ |
735 | basic_block duplicate = NULL; |
736 | for (unsigned i = 0, next_i = 0; |
737 | i < gimple_phi_num_args (gs: phi); i = next_i) |
738 | { |
739 | tree arg = gimple_phi_arg_def (gs: phi, index: i); |
740 | edge e = gimple_phi_arg_edge (phi, i); |
741 | |
742 | /* Advance the argument index unless a path involving |
743 | the current argument has been isolated. */ |
744 | next_i = i + 1; |
745 | bool isolated = false; |
746 | duplicate = handle_return_addr_local_phi_arg (bb, duplicate, lhs, |
747 | arg, e, locmap, |
748 | nargs, isolated: &isolated); |
749 | if (isolated) |
750 | { |
751 | cfg_altered = true; |
752 | next_i = i; |
753 | } |
754 | |
755 | if (!integer_zerop (arg)) |
756 | continue; |
757 | |
758 | location_t phi_arg_loc = gimple_phi_arg_location (phi, i); |
759 | |
760 | imm_use_iterator iter; |
761 | gimple *use_stmt; |
762 | |
763 | /* We've got a NULL PHI argument. Now see if the |
764 | PHI's result is dereferenced within BB. */ |
765 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |
766 | { |
767 | /* We only care about uses in BB. Catching cases in |
768 | in other blocks would require more complex path |
769 | isolation code. */ |
770 | if (gimple_bb (g: use_stmt) != bb) |
771 | continue; |
772 | |
773 | location_t loc = gimple_location (g: use_stmt) |
774 | ? gimple_location (g: use_stmt) |
775 | : phi_arg_loc; |
776 | |
777 | if (stmt_uses_name_in_undefined_way (use_stmt, name: lhs, loc) |
778 | && (duplicate || can_duplicate_block_p (bb))) |
779 | { |
780 | duplicate = isolate_path (bb, duplicate, e, |
781 | stmt: use_stmt, op: lhs, ret_zero: false); |
782 | |
783 | /* When we remove an incoming edge, we need to |
784 | reprocess the Ith element. */ |
785 | next_i = i; |
786 | cfg_altered = true; |
787 | } |
788 | } |
789 | } |
790 | } |
791 | } |
792 | |
793 | diag_returned_locals (maybe: false, locmap); |
794 | } |
795 | |
796 | /* Detect and diagnose returning the address of a local variable |
797 | in RETURN_STMT in basic block BB. This only becomes undefined |
798 | behavior if the result is used, so we do not insert a trap and |
799 | only return NULL instead. */ |
800 | |
801 | static void |
802 | warn_return_addr_local (basic_block bb, greturn *return_stmt) |
803 | { |
804 | tree val = gimple_return_retval (gs: return_stmt); |
805 | if (!val) |
806 | return; |
807 | |
808 | locmap_t locmap; |
809 | hash_set<gphi *> visited_phis; |
810 | if (!is_addr_local (return_stmt, exp: val, plocmap: &locmap, visited: &visited_phis)) |
811 | return; |
812 | |
813 | /* We only need it for this particular case. */ |
814 | calculate_dominance_info (CDI_POST_DOMINATORS); |
815 | |
816 | const args_loc_t *argsloc = locmap.get (k: return_stmt); |
817 | gcc_assert (argsloc); |
818 | |
819 | bool maybe = argsloc->nargs > argsloc->locvec.length (); |
820 | if (!maybe) |
821 | maybe = !dominated_by_p (CDI_POST_DOMINATORS, |
822 | single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)), bb); |
823 | |
824 | diag_returned_locals (maybe, locmap); |
825 | |
826 | /* Bail if the statement isn't certain to return the address |
827 | of a local (e.g., if it involves a conditional expression |
828 | that wasn't trasnformed into a PHI or if it involves |
829 | a MAX_EXPR or MIN_EXPR only one of whose operands is a local |
830 | (even though such an expression isn't valid in C or has |
831 | defined semantics in C++). */ |
832 | if (maybe) |
833 | return; |
834 | |
835 | /* Do not modify code if the user only asked for warnings. */ |
836 | if (flag_isolate_erroneous_paths_dereference |
837 | || flag_isolate_erroneous_paths_attribute) |
838 | { |
839 | tree zero = build_zero_cst (TREE_TYPE (val)); |
840 | gimple_return_set_retval (gs: return_stmt, retval: zero); |
841 | update_stmt (s: return_stmt); |
842 | } |
843 | } |
844 | |
845 | /* Look for statements which exhibit erroneous behavior. For example |
846 | a NULL pointer dereference. |
847 | |
848 | When found, optimize the block containing the erroneous behavior. */ |
849 | static void |
850 | find_explicit_erroneous_behavior (void) |
851 | { |
852 | basic_block bb; |
853 | auto_vec<gimple *> local_bb_split_points; |
854 | bb_split_points = &local_bb_split_points; |
855 | |
856 | FOR_EACH_BB_FN (bb, cfun) |
857 | { |
858 | gimple_stmt_iterator si; |
859 | |
860 | /* Out of an abundance of caution, do not isolate paths to a |
861 | block where the block has any abnormal outgoing edges. |
862 | |
863 | We might be able to relax this in the future. We have to detect |
864 | when we have to split the block with the NULL dereference and |
865 | the trap we insert. We have to preserve abnormal edges out |
866 | of the isolated block which in turn means updating PHIs at |
867 | the targets of those abnormal outgoing edges. */ |
868 | if (has_abnormal_or_eh_outgoing_edge_p (bb)) |
869 | continue; |
870 | |
871 | /* Now look at the statements in the block and see if any of |
872 | them explicitly dereference a NULL pointer. This happens |
873 | because of jump threading and constant propagation. */ |
874 | for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si)) |
875 | { |
876 | gimple *stmt = gsi_stmt (i: si); |
877 | |
878 | if (stmt_uses_0_or_null_in_undefined_way (stmt)) |
879 | { |
880 | insert_trap (si_p: &si, null_pointer_node); |
881 | bb = gimple_bb (g: gsi_stmt (i: si)); |
882 | |
883 | /* Ignore any more operands on this statement and |
884 | continue the statement iterator (which should |
885 | terminate its loop immediately. */ |
886 | cfg_altered = true; |
887 | break; |
888 | } |
889 | |
890 | /* Look for a return statement that returns the address |
891 | of a local variable or the result of alloca. */ |
892 | if (greturn *return_stmt = dyn_cast <greturn *> (p: stmt)) |
893 | warn_return_addr_local (bb, return_stmt); |
894 | } |
895 | } |
896 | |
897 | free_dominance_info (CDI_POST_DOMINATORS); |
898 | |
899 | /* Perform delayed splitting of blocks. */ |
900 | for (gimple *stmt : local_bb_split_points) |
901 | split_block (gimple_bb (g: stmt), stmt); |
902 | |
903 | bb_split_points = NULL; |
904 | } |
905 | |
906 | /* Search the function for statements which, if executed, would cause |
907 | the program to fault such as a dereference of a NULL pointer. |
908 | |
909 | Such a program can't be valid if such a statement was to execute |
910 | according to ISO standards. |
911 | |
912 | We detect explicit NULL pointer dereferences as well as those implied |
913 | by a PHI argument having a NULL value which unconditionally flows into |
914 | a dereference in the same block as the PHI. |
915 | |
916 | In the former case we replace the offending statement with an |
917 | unconditional trap and eliminate the outgoing edges from the statement's |
918 | basic block. This may expose secondary optimization opportunities. |
919 | |
920 | In the latter case, we isolate the path(s) with the NULL PHI |
921 | feeding the dereference. We can then replace the offending statement |
922 | and eliminate the outgoing edges in the duplicate. Again, this may |
923 | expose secondary optimization opportunities. |
924 | |
925 | A warning for both cases may be advisable as well. |
926 | |
927 | Other statically detectable violations of the ISO standard could be |
928 | handled in a similar way, such as out-of-bounds array indexing. */ |
929 | |
930 | static unsigned int |
931 | gimple_ssa_isolate_erroneous_paths (void) |
932 | { |
933 | initialize_original_copy_tables (); |
934 | |
935 | /* Search all the blocks for edges which, if traversed, will |
936 | result in undefined behavior. */ |
937 | cfg_altered = false; |
938 | |
939 | /* First handle cases where traversal of a particular edge |
940 | triggers undefined behavior. These cases require creating |
941 | duplicate blocks and thus new SSA_NAMEs. |
942 | |
943 | We want that process complete prior to the phase where we start |
944 | removing edges from the CFG. Edge removal may ultimately result in |
945 | removal of PHI nodes and thus releasing SSA_NAMEs back to the |
946 | name manager. |
947 | |
948 | If the two processes run in parallel we could release an SSA_NAME |
949 | back to the manager but we could still have dangling references |
950 | to the released SSA_NAME in unreachable blocks. |
951 | that any released names not have dangling references in the IL. */ |
952 | find_implicit_erroneous_behavior (); |
953 | find_explicit_erroneous_behavior (); |
954 | |
955 | free_original_copy_tables (); |
956 | |
957 | /* We scramble the CFG and loop structures a bit, clean up |
958 | appropriately. We really should incrementally update the |
959 | loop structures, in theory it shouldn't be that hard. */ |
960 | if (cfg_altered) |
961 | { |
962 | free_dominance_info (CDI_DOMINATORS); |
963 | loops_state_set (flags: LOOPS_NEED_FIXUP); |
964 | return TODO_cleanup_cfg | TODO_update_ssa; |
965 | } |
966 | return 0; |
967 | } |
968 | |
969 | namespace { |
970 | const pass_data pass_data_isolate_erroneous_paths = |
971 | { |
972 | .type: GIMPLE_PASS, /* type */ |
973 | .name: "isolate-paths", /* name */ |
974 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
975 | .tv_id: TV_ISOLATE_ERRONEOUS_PATHS, /* tv_id */ |
976 | .properties_required: ( PROP_cfg | PROP_ssa ), /* properties_required */ |
977 | .properties_provided: 0, /* properties_provided */ |
978 | .properties_destroyed: 0, /* properties_destroyed */ |
979 | .todo_flags_start: 0, /* todo_flags_start */ |
980 | .todo_flags_finish: 0, /* todo_flags_finish */ |
981 | }; |
982 | |
983 | class pass_isolate_erroneous_paths : public gimple_opt_pass |
984 | { |
985 | public: |
986 | pass_isolate_erroneous_paths (gcc::context *ctxt) |
987 | : gimple_opt_pass (pass_data_isolate_erroneous_paths, ctxt) |
988 | {} |
989 | |
990 | /* opt_pass methods: */ |
991 | opt_pass * clone () final override |
992 | { |
993 | return new pass_isolate_erroneous_paths (m_ctxt); |
994 | } |
995 | bool gate (function *) final override |
996 | { |
997 | /* If we do not have a suitable builtin function for the trap statement, |
998 | then do not perform the optimization. */ |
999 | return (flag_isolate_erroneous_paths_dereference != 0 |
1000 | || flag_isolate_erroneous_paths_attribute != 0 |
1001 | || warn_null_dereference); |
1002 | } |
1003 | |
1004 | unsigned int execute (function *) final override |
1005 | { |
1006 | return gimple_ssa_isolate_erroneous_paths (); |
1007 | } |
1008 | |
1009 | }; // class pass_isolate_erroneous_paths |
1010 | } |
1011 | |
1012 | gimple_opt_pass * |
1013 | make_pass_isolate_erroneous_paths (gcc::context *ctxt) |
1014 | { |
1015 | return new pass_isolate_erroneous_paths (ctxt); |
1016 | } |
1017 |
Definitions
- cfg_altered
- check_loadstore
- bb_split_points
- insert_trap
- isolate_path
- is_divmod_with_given_divisor
- stmt_uses_name_in_undefined_way
- stmt_uses_0_or_null_in_undefined_way
- args_loc_t
- args_loc_t
- args_loc_t
- operator=
- ~args_loc_t
- diag_returned_locals
- is_addr_local
- handle_return_addr_local_phi_arg
- find_implicit_erroneous_behavior
- warn_return_addr_local
- find_explicit_erroneous_behavior
- gimple_ssa_isolate_erroneous_paths
- pass_data_isolate_erroneous_paths
- pass_isolate_erroneous_paths
- pass_isolate_erroneous_paths
- clone
- gate
- execute
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more