This backport contains 50 patchs from gcc main stream tree. The commit id of these patchs list as following in the order of time. ipa-const-prop-2019-06-10-add-ignore-edge-func.patch: commit 97e59627567757759b047479c75be2f238ea45c3 ipa-const-prop-2019-06-14-prop-by-ref-to-callee.patch: commit 46771da57463c62f66af32e9189f1b6fb8bbe8c7 ipa-const-prop-2019-07-05-add-tbaa-para.patch: ipa-const-prop-2019-07-05-add-tbaa-para-conflict-fix.patch commit fb4697e30bd0cd4bda66932e21c183273a5d1e63 ipa-const-prop-2019-07-08-bugfix-drop-useless-instr.patch: ipa-const-prop-2019-07-08-bugfix-drop-useless-instr-conflict-fix.patch commit 38988cbf9ebaa96fb1e891a46aa063f0c298a2e2 ipa-const-prop-2019-07-09-ipa-cp-class-change.patch ipa-const-prop-2019-07-09-ipa-fnsummary-class-change.patch ipa-const-prop-2019-07-09-ipa-inline-analysis-class-change.patch ipa-const-prop-2019-07-09-ipa-prop-class-change.patch ipa-const-prop-2019-07-09-ipa-prop-class-change-conflic-fix.patch ipa-const-prop-2019-07-09-ipa-predicate-class-change.patch commit 99b1c316ec974a39bdd949f8559bb28861b69592 ipa-const-prop-2019-08-07-change-to-poly_64.patch: commit 8600364582f24d2a3f227111c6a87b7d98561c69 ipa-const-prop-2019-08-12-bugfix-add-condition-fix.patch: commit 52c9b7face987062527c612e0a65f084e43c85fd ipa-const-prop-2019-09-17-new-para-ipa-max-switch.patch: commit 351e7c3b5fbd45bde3efb601f7fee9a31c4f2063 ipa-const-prop-2019-09-19-auto-switch-predicate.patch: commit efe126563bb8d28cb3958423a735d0021e75702f ipa-const-prop-2019-10-03-generate-ipa-on-para-ref.patch: commit 4307a485c39fd1c317d6cead2707a903052c4753 ipa-const-prop-2019-10-05-inline-size-para-change.patch: commit 6c291ad828fcb5f01a1d2cb23f6078e9a6f958b9 ipa-const-prop-2019-10-10-bugfix-20040708-split-splay-tree.patch: commit 6488759f404f3aff6642b005242a9c82a1c2cee2 ipa-const-prop-2019-10-23-bugfix-20040708-fix-uid-func.patch: commit b5b6485f1cc54f21713b5b03c5d63d56839ca458 ipa-const-prop-2019-10-23-bugfix-20040708-fix-uid-func-2nd.patch: commit 45012be1f5c7e6039e594bab41ebb94d89a9aca0 ipa-const-prop-2019-10-24-toggle-static-write.patch: commit abebffc609506176f8ba3f64533e15ece49446c0 ipa-const-prop-2019-10-25-bugfix-empty-edge-ICE.patch: commit 5a0236f8ca9d239bb62ef54c9273e6ca3f068f87 ipa-const-prop-2019-10-25-call-size-summary.patch: ipa-const-prop-2019-10-25-call-size-summary-confict-fix.patch commit f658ad3002a0afc8aa86d5646ee704921d969ebe ipa-const-prop-2019-10-27-bugfix-solve-LTO-ICE.patch: commit b1e655646f5b0be3d146825c130690078a8601c3 ipa-const-prop-2019-10-27-do-not-move-jump.patch: commit 051d8a5faa3b37b0dda84c8382174ee70d5b7992 ipa-const-prop-2019-10-27-drop-if-no-arg.patch: commit a33c028eb38268b5084ebc4cc17a1cb64b3a838b ipa-const-prop-2019-10-27-update-sum-after-expand.patch: commit a088d7b10f296dbd57bccbac1bfcf8abb207b034 ipa-const-prop-2019-10-30-remove-global.patch: commit a62bfab5d2a332925fcf10c45b4c5d8ca499439d ipa-const-prop-2019-11-03-add-deplicate-form.patch: commit ac6f2e594886e2209446114023ecdff96b0bd7c4 ipa-const-prop-2019-11-03-ipa-inline-analysis-conflict-fix.patch: ipa-const-prop-2019-11-03-improve-efficiency-of-ipa-poly.patch: commit 40a777e840f74dd5c19ea26c55d1248a335fd11b ipa-const-prop-2019-11-03-ipa-fnsummary-add-call-context.patch: commit 1532500ecbe8dbf59bef498e46b447b3a6b0fa65 ipa-const-prop-2019-11-03-size-ahead-time.patch: commit 360386c7ef1c3fa30de216b1d68ed6a27296fd80 ipa-const-prop-2019-11-04-ipa-inline-includes-ipa-utils.patch: commit 2bc2379be5c98d34ecbb347b2abf059aa6d94499 ipa-const-prop-2019-11-09-add-ipacp-clone.patch: commit 6cf67b62c8cda035dccaca2ae6ff94d560b37a6f ipa-const-prop-2019-11-09-call-nodeRef-on-func-sym.patch: commit 2ee6e04aaecc856bced29711f9765660e0888994 ipa-const-prop-2019-11-13-bugfix-inline-check-before-flatten.patch: commit 2895b172d56c355373b64517a3298a01a2f10ec0 ipa-const-prop-2019-11-13-bugfix-inline-empty-edge.patch: commit 367c959f0303e11e0a6d875abba7d03c72686668 ipa-const-prop-2019-11-13-bugfix-inline-small-function.patch: commit b914768c1968d924d77bbe3f4e707c6105f3682c ipa-const-prop-2019-11-13-bugfix-lto-ICE.patch: commit d200a49f5c83fa0f2e7332aecf69b6ab4a51b052 ipa-const-prop-2019-11-13-fix-ipa-profile-indirect-call.patch: commit 7b34a284cab5d533552c1df995a88f7167d243bd ipa-const-prop-2019-11-14-by-ref-const-prop.patch: ipa-const-prop-2019-11-14-by-ref-const-prop-conflict-fix.patch commit eb270950acbae6f70e3487a6e63a26c1294656b3 ipa-const-prop-2019-11-15-bugfix-segfault-with-null-top.patch: commit 1c3c3f455021130c429f57b09ef39bc218bd7fff ipa-const-prop-2019-11-18-bugfix-ICE-null-edge.patch: commit 8d890d37e0183735586c18f1f056deb5848617ca ipa-const-prop-2019-11-18-bug-fix-ICE.patch: commit 8d890d37e0183735586c18f1f056deb5848617ca ipa-const-prop-2019-12-02-recusion-versioning.patch: ipa-const-prop-2019-12-02-param-conflict-fix.patch commit 9b14fc3326e087975653b1af8ac54114041cde51 The original of these commit can be found on https://github.com/gcc-mirror/gcc Not all these commits are applied directly. If the commit node contains code that affact other modules that unrelated to ipa constant propgation optimization, the part that the optimization need is regrouped into a small new patch, which usually named conflict-fix. diff -Nurp a/gcc/cgraphbuild.c b/gcc/cgraphbuild.c --- a/gcc/cgraphbuild.c 2020-04-30 15:14:04.580000000 +0800 +++ b/gcc/cgraphbuild.c 2020-04-30 15:14:56.584000000 +0800 @@ -428,7 +428,7 @@ cgraph_edge::rebuild_edges (void) node->record_stmt_references (gsi_stmt (gsi)); } record_eh_tables (node, cfun); - gcc_assert (!node->global.inlined_to); + gcc_assert (!node->inlined_to); return 0; } diff -Nurp a/gcc/cgraph.c b/gcc/cgraph.c --- a/gcc/cgraph.c 2020-04-30 15:14:04.576000000 +0800 +++ b/gcc/cgraph.c 2020-04-30 15:14:56.584000000 +0800 @@ -539,7 +539,7 @@ cgraph_node::get_create (tree decl) { cgraph_node *first_clone = cgraph_node::get (decl); - if (first_clone && !first_clone->global.inlined_to) + if (first_clone && !first_clone->inlined_to) return first_clone; cgraph_node *node = cgraph_node::create (decl); @@ -659,7 +659,7 @@ cgraph_node::get_for_asmname (tree asmna node = node->next_sharing_asm_name) { cgraph_node *cn = dyn_cast (node); - if (cn && !cn->global.inlined_to) + if (cn && !cn->inlined_to) return cn; } return NULL; @@ -1857,7 +1857,7 @@ cgraph_node::remove (void) { cgraph_node *n = cgraph_node::get (decl); if (!n - || (!n->clones && !n->clone_of && !n->global.inlined_to + || (!n->clones && !n->clone_of && !n->inlined_to && ((symtab->global_info_ready || in_lto_p) && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl) @@ -1888,7 +1888,7 @@ cgraph_node::mark_address_taken (void) { /* Indirect inlining can figure out that all uses of the address are inlined. */ - if (global.inlined_to) + if (inlined_to) { gcc_assert (cfun->after_inlining); gcc_assert (callers->indirect_inlining_edge); @@ -2012,10 +2012,10 @@ cgraph_node::dump (FILE *f) dump_base (f); - if (global.inlined_to) + if (inlined_to) fprintf (f, " Function %s is inline copy in %s\n", dump_name (), - global.inlined_to->dump_name ()); + inlined_to->dump_name ()); if (clone_of) fprintf (f, " Clone of %s\n", clone_of->dump_asm_name ()); if (symtab->function_flags_ready) @@ -2159,7 +2159,7 @@ cgraph_node::dump (FILE *f) if (dyn_cast (ref->referring)->count.initialized_p ()) sum += dyn_cast (ref->referring)->count.ipa (); - if (global.inlined_to + if (inlined_to || (symtab->state < EXPANSION && ultimate_alias_target () == this && only_called_directly_p ())) ok = !count.ipa ().differs_from_p (sum); @@ -2259,14 +2259,14 @@ cgraph_node::get_availability (symtab_no { cgraph_node *cref = dyn_cast (ref); if (cref) - ref = cref->global.inlined_to; + ref = cref->inlined_to; } enum availability avail; if (!analyzed) avail = AVAIL_NOT_AVAILABLE; else if (local.local) avail = AVAIL_LOCAL; - else if (global.inlined_to) + else if (inlined_to) avail = AVAIL_AVAILABLE; else if (transparent_alias) ultimate_alias_target (&avail, ref); @@ -2878,7 +2878,7 @@ bool cgraph_node::will_be_removed_from_program_if_no_direct_calls_p (bool will_inline) { - gcc_assert (!global.inlined_to); + gcc_assert (!inlined_to); if (DECL_EXTERNAL (decl)) return true; @@ -3065,7 +3065,7 @@ cgraph_edge::verify_corresponds_to_fndec { cgraph_node *node; - if (!decl || callee->global.inlined_to) + if (!decl || callee->inlined_to) return false; if (symtab->state == LTO_STREAMING) return false; @@ -3126,7 +3126,7 @@ cgraph_node::verify_node (void) error ("cgraph count invalid"); error_found = true; } - if (global.inlined_to && same_comdat_group) + if (inlined_to && same_comdat_group) { error ("inline clone in same comdat group list"); error_found = true; @@ -3136,17 +3136,17 @@ cgraph_node::verify_node (void) error ("local symbols must be defined"); error_found = true; } - if (global.inlined_to && externally_visible) + if (inlined_to && externally_visible) { error ("externally visible inline clone"); error_found = true; } - if (global.inlined_to && address_taken) + if (inlined_to && address_taken) { error ("inline clone with address taken"); error_found = true; } - if (global.inlined_to && force_output) + if (inlined_to && force_output) { error ("inline clone is forced to output"); error_found = true; @@ -3183,9 +3183,9 @@ cgraph_node::verify_node (void) } if (!e->inline_failed) { - if (global.inlined_to - != (e->caller->global.inlined_to - ? e->caller->global.inlined_to : e->caller)) + if (inlined_to + != (e->caller->inlined_to + ? e->caller->inlined_to : e->caller)) { error ("inlined_to pointer is wrong"); error_found = true; @@ -3197,7 +3197,7 @@ cgraph_node::verify_node (void) } } else - if (global.inlined_to) + if (inlined_to) { error ("inlined_to pointer set for noninline callers"); error_found = true; @@ -3208,7 +3208,7 @@ cgraph_node::verify_node (void) if (e->verify_count ()) error_found = true; if (gimple_has_body_p (e->caller->decl) - && !e->caller->global.inlined_to + && !e->caller->inlined_to && !e->speculative /* Optimized out calls are redirected to __builtin_unreachable. */ && (e->count.nonzero_p () @@ -3233,7 +3233,7 @@ cgraph_node::verify_node (void) if (e->verify_count ()) error_found = true; if (gimple_has_body_p (e->caller->decl) - && !e->caller->global.inlined_to + && !e->caller->inlined_to && !e->speculative && e->count.ipa_p () && count @@ -3250,12 +3250,12 @@ cgraph_node::verify_node (void) error_found = true; } } - if (!callers && global.inlined_to) + if (!callers && inlined_to) { error ("inlined_to pointer is set but no predecessors found"); error_found = true; } - if (global.inlined_to == this) + if (inlined_to == this) { error ("inlined_to pointer refers to itself"); error_found = true; @@ -3344,7 +3344,7 @@ cgraph_node::verify_node (void) error ("More than one edge out of thunk node"); error_found = true; } - if (gimple_has_body_p (decl) && !global.inlined_to) + if (gimple_has_body_p (decl) && !inlined_to) { error ("Thunk is not supposed to have body"); error_found = true; @@ -3352,7 +3352,7 @@ cgraph_node::verify_node (void) } else if (analyzed && gimple_has_body_p (decl) && !TREE_ASM_WRITTEN (decl) - && (!DECL_EXTERNAL (decl) || global.inlined_to) + && (!DECL_EXTERNAL (decl) || inlined_to) && !flag_wpa) { if (this_cfun->cfg) @@ -3623,7 +3623,7 @@ cgraph_node::get_body (void) early. TODO: Materializing clones here will likely lead to smaller LTRANS footprint. */ - gcc_assert (!global.inlined_to && !clone_of); + gcc_assert (!inlined_to && !clone_of); if (ipa_transforms_to_apply.exists ()) { opt_pass *saved_current_pass = current_pass; @@ -3813,8 +3813,8 @@ cgraph_node::has_thunk_p (cgraph_node *n sreal cgraph_edge::sreal_frequency () { - return count.to_sreal_scale (caller->global.inlined_to - ? caller->global.inlined_to->count + return count.to_sreal_scale (caller->inlined_to + ? caller->inlined_to->count : caller->count); } diff -Nurp a/gcc/cgraphclones.c b/gcc/cgraphclones.c --- a/gcc/cgraphclones.c 2020-04-30 15:14:04.644000000 +0800 +++ b/gcc/cgraphclones.c 2020-04-30 15:14:56.628000000 +0800 @@ -458,8 +458,7 @@ cgraph_node::create_clone (tree new_decl new_node->externally_visible = false; new_node->no_reorder = no_reorder; new_node->local.local = true; - new_node->global = global; - new_node->global.inlined_to = new_inlined_to; + new_node->inlined_to = new_inlined_to; new_node->rtl = rtl; new_node->frequency = frequency; new_node->tp_first_run = tp_first_run; @@ -671,6 +670,7 @@ cgraph_node::create_virtual_clone (vecipcp_clone = ipcp_clone; new_node->clone.tree_map = tree_map; if (!implicit_section) new_node->set_section (get_section ()); @@ -965,7 +965,7 @@ cgraph_node::create_version_clone (tree new_version->externally_visible = false; new_version->no_reorder = no_reorder; new_version->local.local = new_version->definition; - new_version->global = global; + new_version->inlined_to = inlined_to; new_version->rtl = rtl; new_version->count = count; diff -Nurp a/gcc/cgraph.h b/gcc/cgraph.h --- a/gcc/cgraph.h 2020-04-30 15:14:04.624000000 +0800 +++ b/gcc/cgraph.h 2020-04-30 15:14:56.628000000 +0800 @@ -718,15 +718,6 @@ struct GTY(()) cgraph_local_info { unsigned tm_may_enter_irr : 1; }; -/* Information about the function that needs to be computed globally - once compilation is finished. Available only with -funit-at-a-time. */ - -struct GTY(()) cgraph_global_info { - /* For inline clones this points to the function they will be - inlined into. */ - cgraph_node *inlined_to; -}; - /* Represent which DECL tree (or reference to such tree) will be replaced by another tree while versioning. */ struct GTY(()) ipa_replace_map @@ -959,7 +950,7 @@ public: If the new node is being inlined into another one, NEW_INLINED_TO should be the outline function the new one is (even indirectly) inlined to. - All hooks will see this in node's global.inlined_to, when invoked. + All hooks will see this in node's inlined_to, when invoked. Can be NULL if the node is not inlined. SUFFIX is string that is appended to the original name. */ cgraph_node *create_clone (tree decl, profile_count count, @@ -1420,7 +1411,11 @@ public: vec GTY((skip)) ipa_transforms_to_apply; cgraph_local_info local; - cgraph_global_info global; + + /* For inline clones this points to the function they will be + inlined into. */ + cgraph_node *inlined_to; + struct cgraph_rtl_info *rtl; cgraph_clone_info clone; cgraph_thunk_info thunk; @@ -1474,6 +1469,8 @@ public: unsigned split_part : 1; /* True if the function appears as possible target of indirect call. */ unsigned indirect_call_target : 1; + /* True if this was a clone created by ipa-cp. */ + unsigned ipcp_clone : 1; private: /* Unique id of the node. */ @@ -2474,7 +2471,7 @@ symtab_node::real_symbol_p (void) if (!is_a (this)) return true; cnode = dyn_cast (this); - if (cnode->global.inlined_to) + if (cnode->inlined_to) return false; return true; } @@ -2497,13 +2494,13 @@ symtab_node::in_same_comdat_group_p (sym if (cgraph_node *cn = dyn_cast (target)) { - if (cn->global.inlined_to) - source = cn->global.inlined_to; + if (cn->inlined_to) + source = cn->inlined_to; } if (cgraph_node *cn = dyn_cast (target)) { - if (cn->global.inlined_to) - target = cn->global.inlined_to; + if (cn->inlined_to) + target = cn->inlined_to; } return source->get_comdat_group () == target->get_comdat_group (); @@ -2964,7 +2961,7 @@ struct GTY((for_user)) constant_descript inline bool cgraph_node::only_called_directly_or_aliased_p (void) { - gcc_assert (!global.inlined_to); + gcc_assert (!inlined_to); return (!force_output && !address_taken && !ifunc_resolver && !used_from_other_partition @@ -2981,7 +2978,7 @@ cgraph_node::only_called_directly_or_ali inline bool cgraph_node::can_remove_if_no_direct_calls_and_refs_p (void) { - gcc_checking_assert (!global.inlined_to); + gcc_checking_assert (!inlined_to); /* Extern inlines can always go, we will use the external definition. */ if (DECL_EXTERNAL (decl)) return true; @@ -3152,8 +3149,8 @@ inline bool cgraph_edge::recursive_p (void) { cgraph_node *c = callee->ultimate_alias_target (); - if (caller->global.inlined_to) - return caller->global.inlined_to->decl == c->decl; + if (caller->inlined_to) + return caller->inlined_to->decl == c->decl; else return caller->decl == c->decl; } @@ -3190,8 +3187,8 @@ cgraph_edge::binds_to_current_def_p () inline int cgraph_edge::frequency () { - return count.to_cgraph_frequency (caller->global.inlined_to - ? caller->global.inlined_to->count + return count.to_cgraph_frequency (caller->inlined_to + ? caller->inlined_to->count : caller->count); } @@ -3213,7 +3210,7 @@ inline void cgraph_node::mark_force_output (void) { force_output = 1; - gcc_checking_assert (!global.inlined_to); + gcc_checking_assert (!inlined_to); } /* Return true if function should be optimized for size. */ diff -Nurp a/gcc/cgraphunit.c b/gcc/cgraphunit.c --- a/gcc/cgraphunit.c 2020-04-30 15:14:04.592000000 +0800 +++ b/gcc/cgraphunit.c 2020-04-30 15:14:56.584000000 +0800 @@ -340,7 +340,10 @@ symbol_table::process_new_functions (voi and splitting. This is redundant for functions added late. Just throw away whatever it did. */ if (!summaried_computed) - ipa_free_fn_summary (); + { + ipa_free_fn_summary (); + ipa_free_size_summary (); + } } else if (ipa_fn_summaries != NULL) compute_fn_summary (node, true); @@ -389,7 +392,7 @@ cgraph_node::reset (void) /* Reset our data structures so we can analyze the function again. */ memset (&local, 0, sizeof (local)); - memset (&global, 0, sizeof (global)); + inlined_to = NULL; memset (&rtl, 0, sizeof (rtl)); analyzed = false; definition = false; @@ -1504,7 +1507,7 @@ mark_functions_to_output (void) if (node->analyzed && !node->thunk.thunk_p && !node->alias - && !node->global.inlined_to + && !node->inlined_to && !TREE_ASM_WRITTEN (decl) && !DECL_EXTERNAL (decl)) { @@ -1529,7 +1532,7 @@ mark_functions_to_output (void) { /* We should've reclaimed all functions that are not needed. */ if (flag_checking - && !node->global.inlined_to + && !node->inlined_to && gimple_has_body_p (decl) /* FIXME: in ltrans unit when offline copy is outside partition but inline copies are inside partition, we can end up not removing the body since we no longer @@ -1542,7 +1545,7 @@ mark_functions_to_output (void) node->debug (); internal_error ("failed to reclaim unneeded function"); } - gcc_assert (node->global.inlined_to + gcc_assert (node->inlined_to || !gimple_has_body_p (decl) || node->in_other_partition || node->clones @@ -1557,7 +1560,7 @@ mark_functions_to_output (void) if (node->same_comdat_group && !node->process) { tree decl = node->decl; - if (!node->global.inlined_to + if (!node->inlined_to && gimple_has_body_p (decl) /* FIXME: in an ltrans unit when the offline copy is outside a partition but inline copies are inside a partition, we can @@ -2118,7 +2121,7 @@ cgraph_node::assemble_thunks_and_aliases for (e = callers; e;) if (e->caller->thunk.thunk_p - && !e->caller->global.inlined_to) + && !e->caller->inlined_to) { cgraph_node *thunk = e->caller; @@ -2155,7 +2158,7 @@ cgraph_node::expand (void) location_t saved_loc; /* We ought to not compile any inline clones. */ - gcc_assert (!global.inlined_to); + gcc_assert (!inlined_to); /* __RTL functions are compiled as soon as they are parsed, so don't do it again. */ @@ -2707,7 +2710,7 @@ symbol_table::compile (void) bool error_found = false; FOR_EACH_DEFINED_FUNCTION (node) - if (node->global.inlined_to + if (node->inlined_to || gimple_has_body_p (node->decl)) { error_found = true; diff -Nurp a/gcc/data-streamer.h b/gcc/data-streamer.h --- a/gcc/data-streamer.h 2020-04-30 15:14:04.648000000 +0800 +++ b/gcc/data-streamer.h 2020-04-30 15:14:56.504000000 +0800 @@ -53,6 +53,7 @@ HOST_WIDE_INT bp_unpack_var_len_int (str void streamer_write_zero (struct output_block *); void streamer_write_uhwi (struct output_block *, unsigned HOST_WIDE_INT); void streamer_write_hwi (struct output_block *, HOST_WIDE_INT); +void streamer_write_poly_uint64 (struct output_block *, poly_uint64); void streamer_write_gcov_count (struct output_block *, gcov_type); void streamer_write_string (struct output_block *, struct lto_output_stream *, const char *, bool); @@ -82,6 +83,7 @@ const char *bp_unpack_indexed_string (st const char *bp_unpack_string (struct data_in *, struct bitpack_d *); unsigned HOST_WIDE_INT streamer_read_uhwi (struct lto_input_block *); HOST_WIDE_INT streamer_read_hwi (struct lto_input_block *); +poly_uint64 streamer_read_poly_uint64 (struct lto_input_block *); gcov_type streamer_read_gcov_count (struct lto_input_block *); wide_int streamer_read_wide_int (struct lto_input_block *); widest_int streamer_read_widest_int (struct lto_input_block *); diff -Nurp a/gcc/data-streamer-in.c b/gcc/data-streamer-in.c --- a/gcc/data-streamer-in.c 2020-04-30 15:14:04.628000000 +0800 +++ b/gcc/data-streamer-in.c 2020-04-30 15:14:56.504000000 +0800 @@ -175,6 +175,17 @@ streamer_read_hwi (struct lto_input_bloc } } +/* Read a poly_uint64 from IB. */ + +poly_uint64 +streamer_read_poly_uint64 (class lto_input_block *ib) +{ + poly_uint64 res; + for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i) + res.coeffs[i] = streamer_read_uhwi (ib); + return res; +} + /* Read gcov_type value from IB. */ gcov_type diff -Nurp a/gcc/data-streamer-out.c b/gcc/data-streamer-out.c --- a/gcc/data-streamer-out.c 2020-04-30 15:14:04.600000000 +0800 +++ b/gcc/data-streamer-out.c 2020-04-30 15:14:56.504000000 +0800 @@ -220,6 +220,15 @@ streamer_write_hwi (struct output_block streamer_write_hwi_stream (ob->main_stream, work); } +/* Write a poly_uint64 value WORK to OB->main_stream. */ + +void +streamer_write_poly_uint64 (struct output_block *ob, poly_uint64 work) +{ + for (int i = 0; i < NUM_POLY_INT_COEFFS; ++i) + streamer_write_uhwi_stream (ob->main_stream, work.coeffs[i]); +} + /* Write a gcov counter value WORK to OB->main_stream. */ void diff -Nurp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi --- a/gcc/doc/invoke.texi 2020-04-30 15:14:04.664000000 +0800 +++ b/gcc/doc/invoke.texi 2020-04-30 15:14:56.692000000 +0800 @@ -11836,6 +11836,13 @@ IPA-CP calculates its own score of cloni and performs those cloning opportunities with scores that exceed @option{ipa-cp-eval-threshold}. +@item ipa-cp-max-recursive-depth +Maximum depth of recursive cloning for self-recursive function. + +@item ipa-cp-min-recursive-probability +Recursive cloning only when the probability of call being executed exceeds +the parameter. + @item ipa-cp-recursion-penalty Percentage penalty the recursive functions will receive when they are evaluated for cloning. diff -Nurp a/gcc/gimple-fold.c b/gcc/gimple-fold.c --- a/gcc/gimple-fold.c 2020-04-30 15:14:04.632000000 +0800 +++ b/gcc/gimple-fold.c 2020-04-30 15:14:56.584000000 +0800 @@ -135,7 +135,7 @@ can_refer_decl_in_current_unit_p (tree d if (!snode || !snode->definition) return false; node = dyn_cast (snode); - return !node || !node->global.inlined_to; + return !node || !node->inlined_to; } /* We will later output the initializer, so we can refer to it. @@ -184,7 +184,7 @@ can_refer_decl_in_current_unit_p (tree d || (!snode->forced_by_abi && !snode->force_output)))) return false; node = dyn_cast (snode); - return !node || !node->global.inlined_to; + return !node || !node->inlined_to; } /* Create a temporary for TYPE for a statement STMT. If the current function diff -Nurp a/gcc/ipa.c b/gcc/ipa.c --- a/gcc/ipa.c 2020-04-30 15:14:04.636000000 +0800 +++ b/gcc/ipa.c 2020-04-30 15:14:56.588000000 +0800 @@ -71,9 +71,9 @@ update_inlined_to_pointer (struct cgraph { struct cgraph_edge *e; for (e = node->callees; e; e = e->next_callee) - if (e->callee->global.inlined_to) + if (e->callee->inlined_to) { - e->callee->global.inlined_to = inlined_to; + e->callee->inlined_to = inlined_to; update_inlined_to_pointer (e->callee, inlined_to); } } @@ -335,11 +335,11 @@ symbol_table::remove_unreachable_nodes ( node->used_as_abstract_origin = false; node->indirect_call_target = false; if (node->definition - && !node->global.inlined_to + && !node->inlined_to && !node->in_other_partition && !node->can_remove_if_no_direct_calls_and_refs_p ()) { - gcc_assert (!node->global.inlined_to); + gcc_assert (!node->inlined_to); reachable.add (node); enqueue_node (node, &first, &reachable); } @@ -451,7 +451,7 @@ symbol_table::remove_unreachable_nodes ( /* When inline clone exists, mark body to be preserved so when removing offline copy of the function we don't kill it. */ - if (cnode->global.inlined_to) + if (cnode->inlined_to) body_needed_for_clonning.add (cnode->decl); /* For non-inline clones, force their origins to the boundary and ensure @@ -560,11 +560,11 @@ symbol_table::remove_unreachable_nodes ( to turn it into normal cone. */ FOR_EACH_FUNCTION (node) { - if (node->global.inlined_to + if (node->inlined_to && !node->callers) { gcc_assert (node->clones); - node->global.inlined_to = NULL; + node->inlined_to = NULL; update_inlined_to_pointer (node, node); } node->aux = NULL; @@ -1207,8 +1207,8 @@ propagate_single_user (varpool_node *vno struct cgraph_node *cnode = dyn_cast (ref->referring); if (cnode) { - if (cnode->global.inlined_to) - cnode = cnode->global.inlined_to; + if (cnode->inlined_to) + cnode = cnode->inlined_to; if (!function) function = cnode; else if (function != cnode) diff -Nurp a/gcc/ipa-comdats.c b/gcc/ipa-comdats.c --- a/gcc/ipa-comdats.c 2020-04-30 15:14:04.612000000 +0800 +++ b/gcc/ipa-comdats.c 2020-04-30 15:14:56.584000000 +0800 @@ -98,8 +98,8 @@ propagate_comdat_group (struct symtab_no if (cgraph_node * cn = dyn_cast (symbol2)) { - if (cn->global.inlined_to) - symbol2 = cn->global.inlined_to; + if (cn->inlined_to) + symbol2 = cn->inlined_to; } /* The actual merge operation. */ @@ -133,8 +133,8 @@ propagate_comdat_group (struct symtab_no /* If we see inline clone, its comdat group actually corresponds to the comdat group of the function it is inlined to. */ - if (cn->global.inlined_to) - symbol2 = cn->global.inlined_to; + if (cn->inlined_to) + symbol2 = cn->inlined_to; } /* The actual merge operation. */ diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c --- a/gcc/ipa-cp.c 2020-04-30 15:14:04.592000000 +0800 +++ b/gcc/ipa-cp.c 2020-04-30 15:14:56.700000000 +0800 @@ -229,7 +229,9 @@ public: inline bool set_contains_variable (); bool add_value (valtype newval, cgraph_edge *cs, ipcp_value *src_val = NULL, - int src_idx = 0, HOST_WIDE_INT offset = -1); + int src_idx = 0, HOST_WIDE_INT offset = -1, + ipcp_value **val_p = NULL, + bool unlimited = false); void print (FILE * f, bool dump_sources, bool dump_benefits); }; @@ -381,8 +383,8 @@ static hash_map /* Return the param lattices structure corresponding to the Ith formal parameter of the function described by INFO. */ -static inline struct ipcp_param_lattices * -ipa_get_parm_lattices (struct ipa_node_params *info, int i) +static inline class ipcp_param_lattices * +ipa_get_parm_lattices (class ipa_node_params *info, int i) { gcc_assert (i >= 0 && i < ipa_get_param_count (info)); gcc_checking_assert (!info->ipcp_orig_node); @@ -393,18 +395,18 @@ ipa_get_parm_lattices (struct ipa_node_p /* Return the lattice corresponding to the scalar value of the Ith formal parameter of the function described by INFO. */ static inline ipcp_lattice * -ipa_get_scalar_lat (struct ipa_node_params *info, int i) +ipa_get_scalar_lat (class ipa_node_params *info, int i) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); return &plats->itself; } /* Return the lattice corresponding to the scalar value of the Ith formal parameter of the function described by INFO. */ static inline ipcp_lattice * -ipa_get_poly_ctx_lat (struct ipa_node_params *info, int i) +ipa_get_poly_ctx_lat (class ipa_node_params *info, int i) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); return &plats->ctxlat; } @@ -539,7 +541,7 @@ print_all_lattices (FILE * f, bool dump_ fprintf (f, "\nLattices:\n"); FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) { - struct ipa_node_params *info; + class ipa_node_params *info; info = IPA_NODE_REF (node); /* Skip constprop clones since we don't make lattices for them. */ @@ -550,7 +552,7 @@ print_all_lattices (FILE * f, bool dump_ for (i = 0; i < count; i++) { struct ipcp_agg_lattice *aglat; - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); fprintf (f, " param [%d]: ", i); plats->itself.print (f, dump_sources, dump_benefits); fprintf (f, " ctxs: "); @@ -585,7 +587,7 @@ print_all_lattices (FILE * f, bool dump_ static void determine_versionability (struct cgraph_node *node, - struct ipa_node_params *info) + class ipa_node_params *info) { const char *reason = NULL; @@ -656,7 +658,7 @@ determine_versionability (struct cgraph_ static bool ipcp_versionable_function_p (struct cgraph_node *node) { - return IPA_NODE_REF (node)->versionable; + return IPA_NODE_REF (node) && IPA_NODE_REF (node)->versionable; } /* Structure holding accumulated information about callers of a node. */ @@ -731,7 +733,7 @@ ipcp_cloning_candidate_p (struct cgraph_ init_caller_stats (&stats); node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); - if (ipa_fn_summaries->get (node)->self_size < stats.n_calls) + if (ipa_size_summaries->get (node)->self_size < stats.n_calls) { if (dump_file) fprintf (dump_file, "Considering %s for cloning; code might shrink.\n", @@ -806,23 +808,39 @@ public: {} }; +/* Skip edges from and to nodes without ipa_cp enabled. + Ignore not available symbols. */ + +static bool +ignore_edge_p (cgraph_edge *e) +{ + enum availability avail; + cgraph_node *ultimate_target + = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); + + return (avail <= AVAIL_INTERPOSABLE + || !opt_for_fn (e->caller->decl, flag_ipa_cp) + || !opt_for_fn (ultimate_target->decl, flag_ipa_cp)); +} + /* Allocate the arrays in TOPO and topologically sort the nodes into order. */ static void -build_toporder_info (struct ipa_topo_info *topo) +build_toporder_info (class ipa_topo_info *topo) { topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); gcc_checking_assert (topo->stack_top == 0); - topo->nnodes = ipa_reduced_postorder (topo->order, true, NULL); + topo->nnodes = ipa_reduced_postorder (topo->order, true, + ignore_edge_p); } /* Free information about strongly connected components and the arrays in TOPO. */ static void -free_toporder_info (struct ipa_topo_info *topo) +free_toporder_info (class ipa_topo_info *topo) { ipa_free_postorder_info (); free (topo->order); @@ -832,9 +850,9 @@ free_toporder_info (struct ipa_topo_info /* Add NODE to the stack in TOPO, unless it is already there. */ static inline void -push_node_to_stack (struct ipa_topo_info *topo, struct cgraph_node *node) +push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); if (info->node_enqueued) return; info->node_enqueued = 1; @@ -845,7 +863,7 @@ push_node_to_stack (struct ipa_topo_info is empty. */ static struct cgraph_node * -pop_node_from_stack (struct ipa_topo_info *topo) +pop_node_from_stack (class ipa_topo_info *topo) { if (topo->stack_top) { @@ -887,7 +905,7 @@ ipcp_lattice::set_contains_vari not previously set as such. */ static inline bool -set_agg_lats_to_bottom (struct ipcp_param_lattices *plats) +set_agg_lats_to_bottom (class ipcp_param_lattices *plats) { bool ret = !plats->aggs_bottom; plats->aggs_bottom = true; @@ -898,7 +916,7 @@ set_agg_lats_to_bottom (struct ipcp_para return true if they were not previously marked as such. */ static inline bool -set_agg_lats_contain_variable (struct ipcp_param_lattices *plats) +set_agg_lats_contain_variable (class ipcp_param_lattices *plats) { bool ret = !plats->aggs_contain_variable; plats->aggs_contain_variable = true; @@ -1108,7 +1126,7 @@ ipcp_bits_lattice::meet_with (ipcp_bits_ return true is any of them has not been marked as such so far. */ static inline bool -set_all_contains_variable (struct ipcp_param_lattices *plats) +set_all_contains_variable (class ipcp_param_lattices *plats) { bool ret; ret = plats->itself.set_contains_variable (); @@ -1158,7 +1176,7 @@ set_single_call_flag (cgraph_node *node, static void initialize_node_lattices (struct cgraph_node *node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); struct cgraph_edge *ie; bool disable = false, variable = false; int i; @@ -1188,7 +1206,7 @@ initialize_node_lattices (struct cgraph_ for (i = 0; i < ipa_get_param_count (info); i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); plats->m_value_range.init (); } @@ -1196,7 +1214,7 @@ initialize_node_lattices (struct cgraph_ { for (i = 0; i < ipa_get_param_count (info); i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); if (disable) { plats->itself.set_to_bottom (); @@ -1224,23 +1242,23 @@ initialize_node_lattices (struct cgraph_ } } -/* Return the result of a (possibly arithmetic) pass through jump function - JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter - to which the result is passed. Return NULL_TREE if that cannot be - determined or be considered an interprocedural invariant. */ +/* Return the result of a (possibly arithmetic) operation on the constant + value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is + the type of the parameter to which the result is passed. Return + NULL_TREE if that cannot be determined or be considered an + interprocedural invariant. */ static tree -ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input, - tree res_type) +ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand, + tree res_type) { tree res; - if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) + if (opcode == NOP_EXPR) return input; if (!is_gimple_ip_invariant (input)) return NULL_TREE; - tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); if (!res_type) { if (TREE_CODE_CLASS (opcode) == tcc_comparison) @@ -1254,8 +1272,7 @@ ipa_get_jf_pass_through_result (struct i if (TREE_CODE_CLASS (opcode) == tcc_unary) res = fold_unary (opcode, res_type, input); else - res = fold_binary (opcode, res_type, input, - ipa_get_jf_pass_through_operand (jfunc)); + res = fold_binary (opcode, res_type, input, operand); if (res && !is_gimple_ip_invariant (res)) return NULL_TREE; @@ -1263,6 +1280,21 @@ ipa_get_jf_pass_through_result (struct i return res; } +/* Return the result of a (possibly arithmetic) pass through jump function + JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter + to which the result is passed. Return NULL_TREE if that cannot be + determined or be considered an interprocedural invariant. */ + +static tree +ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input, + tree res_type) +{ + return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc), + input, + ipa_get_jf_pass_through_operand (jfunc), + res_type); +} + /* Return the result of an ancestor jump function JFUNC on the constant value INPUT. Return NULL_TREE if that cannot be determined. */ @@ -1289,7 +1321,7 @@ ipa_get_jf_ancestor_result (struct ipa_j passed. */ tree -ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc, +ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc, tree parm_type) { if (jfunc->type == IPA_JF_CONST) @@ -1396,6 +1428,146 @@ ipa_context_from_jfunc (ipa_node_params return ctx; } +/* See if NODE is a clone with a known aggregate value at a given OFFSET of a + parameter with the given INDEX. */ + +static tree +get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset, + int index) +{ + struct ipa_agg_replacement_value *aggval; + + aggval = ipa_get_agg_replacements_for_node (node); + while (aggval) + { + if (aggval->offset == offset + && aggval->index == index) + return aggval->value; + aggval = aggval->next; + } + return NULL_TREE; +} + +/* Determine whether ITEM, jump function for an aggregate part, evaluates to a + single known constant value and if so, return it. Otherwise return NULL. + NODE and INFO describes the caller node or the one it is inlined to, and + its related info. */ + +static tree +ipa_agg_value_from_node (class ipa_node_params *info, + struct cgraph_node *node, + struct ipa_agg_jf_item *item) +{ + tree value = NULL_TREE; + int src_idx; + + if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) + return NULL_TREE; + + if (item->jftype == IPA_JF_CONST) + return item->value.constant; + + gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH + || item->jftype == IPA_JF_LOAD_AGG); + + src_idx = item->value.pass_through.formal_id; + + if (info->ipcp_orig_node) + { + if (item->jftype == IPA_JF_PASS_THROUGH) + value = info->known_csts[src_idx]; + else + value = get_clone_agg_value (node, item->value.load_agg.offset, + src_idx); + } + else if (info->lattices) + { + class ipcp_param_lattices *src_plats + = ipa_get_parm_lattices (info, src_idx); + + if (item->jftype == IPA_JF_PASS_THROUGH) + { + struct ipcp_lattice *lat = &src_plats->itself; + + if (!lat->is_single_const ()) + return NULL_TREE; + + value = lat->values->value; + } + else if (src_plats->aggs + && !src_plats->aggs_bottom + && !src_plats->aggs_contain_variable + && src_plats->aggs_by_ref == item->value.load_agg.by_ref) + { + struct ipcp_agg_lattice *aglat; + + for (aglat = src_plats->aggs; aglat; aglat = aglat->next) + { + if (aglat->offset > item->value.load_agg.offset) + break; + + if (aglat->offset == item->value.load_agg.offset) + { + if (aglat->is_single_const ()) + value = aglat->values->value; + break; + } + } + } + } + + if (!value) + return NULL_TREE; + + if (item->jftype == IPA_JF_LOAD_AGG) + { + tree load_type = item->value.load_agg.type; + tree value_type = TREE_TYPE (value); + + /* Ensure value type is compatible with load type. */ + if (!useless_type_conversion_p (load_type, value_type)) + return NULL_TREE; + } + + return ipa_get_jf_arith_result (item->value.pass_through.operation, + value, + item->value.pass_through.operand, + item->type); +} + +/* Determine whether AGG_JFUNC evaluates to a set of known constant value for + an aggregate and if so, return it. Otherwise return an empty set. NODE + and INFO describes the caller node or the one it is inlined to, and its + related info. */ + +struct ipa_agg_value_set +ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node, + struct ipa_agg_jump_function *agg_jfunc) +{ + struct ipa_agg_value_set agg; + struct ipa_agg_jf_item *item; + int i; + + agg.items = vNULL; + agg.by_ref = agg_jfunc->by_ref; + + FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item) + { + tree value = ipa_agg_value_from_node (info, node, item); + + if (value) + { + struct ipa_agg_value value_item; + + value_item.offset = item->offset; + value_item.value = value; + + agg.items.safe_push (value_item); + } + } + return agg; +} + /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not bottom, not containing a variable component and without any known value at the same time. */ @@ -1407,7 +1579,9 @@ ipcp_verify_propagated_values (void) FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); + if (!opt_for_fn (node->decl, flag_ipa_cp)) + continue; int i, count = ipa_get_param_count (info); for (i = 0; i < count; i++) @@ -1516,22 +1690,32 @@ allocate_and_init_ipcp_value (ipa_polymo /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same meaning. OFFSET -1 means the source is scalar and not a part of an - aggregate. */ + aggregate. If non-NULL, VAL_P records address of existing or newly added + ipcp_value. UNLIMITED means whether value count should not exceed the limit + given by PARAM_IPA_CP_VALUE_LIST_SIZE. */ template bool ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, ipcp_value *src_val, - int src_idx, HOST_WIDE_INT offset) + int src_idx, HOST_WIDE_INT offset, + ipcp_value **val_p, + bool unlimited) { - ipcp_value *val; + ipcp_value *val, *last_val = NULL; + + if (val_p) + *val_p = NULL; if (bottom) return false; - for (val = values; val; val = val->next) + for (val = values; val; last_val = val, val = val->next) if (values_equal_for_ipcp_p (val->value, newval)) { + if (val_p) + *val_p = val; + if (ipa_edge_within_scc (cs)) { ipcp_value_source *s; @@ -1546,7 +1730,7 @@ ipcp_lattice::add_value (valtyp return false; } - if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) + if (!unlimited && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) { /* We can only free sources, not the values themselves, because sources of other values in this SCC might point to them. */ @@ -1559,7 +1743,6 @@ ipcp_lattice::add_value (valtyp ipcp_sources_pool.remove ((ipcp_value_source*)src); } } - values = NULL; return set_to_bottom (); } @@ -1567,41 +1750,177 @@ ipcp_lattice::add_value (valtyp values_count++; val = allocate_and_init_ipcp_value (newval); val->add_source (cs, src_val, src_idx, offset); - val->next = values; - values = val; + val->next = NULL; + + /* Add the new value to end of value list, which can reduce iterations + of propagation stage for recursive function. */ + if (last_val) + last_val->next = val; + else + values = val; + + if (val_p) + *val_p = val; + return true; } -/* Propagate values through a pass-through jump function JFUNC associated with - edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX - is the index of the source parameter. PARM_TYPE is the type of the - parameter to which the result is passed. */ +/* Return true, if a ipcp_value VAL is orginated from parameter value of + self-feeding recursive function by applying non-passthrough arithmetic + transformation. */ static bool -propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, - ipcp_lattice *src_lat, - ipcp_lattice *dest_lat, int src_idx, - tree parm_type) +self_recursively_generated_p (ipcp_value *val) +{ + class ipa_node_params *info = NULL; + + for (ipcp_value_source *src = val->sources; src; src = src->next) + { + cgraph_edge *cs = src->cs; + + if (!src->val || cs->caller != cs->callee->function_symbol () + || src->val == val) + return false; + + if (!info) + info = IPA_NODE_REF (cs->caller); + + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, + src->index); + ipcp_lattice *src_lat = src->offset == -1 ? &plats->itself + : plats->aggs; + ipcp_value *src_val; + + for (src_val = src_lat->values; src_val; src_val = src_val->next) + if (src_val == val) + break; + + if (!src_val) + return false; + } + + return true; +} + +/* A helper function that returns result of operation specified by OPCODE on + the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the + value of SRC_VAL. If the operation is binary, OPND2 is a constant value + acting as its second operand. If non-NULL, RES_TYPE is expected type of + the result. */ + +static tree +get_val_across_arith_op (enum tree_code opcode, + tree opnd1_type, + tree opnd2, + ipcp_value *src_val, + tree res_type) +{ + tree opnd1 = src_val->value; + + /* Skip source values that is incompatible with specified type. */ + if (opnd1_type + && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) + return NULL_TREE; + + return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); +} + +/* Propagate values through an arithmetic transformation described by a jump + function associated with edge CS, taking values from SRC_LAT and putting + them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. + OPND2 is a constant value if transformation is a binary operation. + SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of + a part of the aggregate. SRC_IDX is the index of the source parameter. + RES_TYPE is the value type of result being propagated into. Return true if + DEST_LAT changed. */ + +static bool +propagate_vals_across_arith_jfunc (cgraph_edge *cs, + enum tree_code opcode, + tree opnd1_type, + tree opnd2, + ipcp_lattice *src_lat, + ipcp_lattice *dest_lat, + HOST_WIDE_INT src_offset, + int src_idx, + tree res_type) { ipcp_value *src_val; bool ret = false; - /* Do not create new values when propagating within an SCC because if there - are arithmetic functions with circular dependencies, there is infinite - number of them and we would just make lattices bottom. If this condition - is ever relaxed we have to detect self-feeding recursive calls in - cgraph_edge_brings_value_p in a smarter way. */ - if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) - && ipa_edge_within_scc (cs)) - ret = dest_lat->set_contains_variable (); + /* Due to circular dependencies, propagating within an SCC through arithmetic + transformation would create infinite number of values. But for + self-feeding recursive function, we could allow propagation in a limited + count, and this can enable a simple kind of recursive function versioning. + For other scenario, we would just make lattices bottom. */ + if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) + { + int i; + + if (src_lat != dest_lat || PARAM_VALUE(PARAM_IPA_CP_MAX_RECURSIVE_DEPTH) < 1) + return dest_lat->set_contains_variable (); + + /* No benefit if recursive execution is in low probability. */ + if (cs->sreal_frequency () * 100 + <= ((sreal) 1) * PARAM_VALUE(PARAM_IPA_CP_MIN_RECURSIVE_PROBABILITY)) + return dest_lat->set_contains_variable (); + + auto_vec *, 8> val_seeds; + + for (src_val = src_lat->values; src_val; src_val = src_val->next) + { + /* Now we do not use self-recursively generated value as propagation + source, this is absolutely conservative, but could avoid explosion + of lattice's value space, especially when one recursive function + calls another recursive. */ + if (self_recursively_generated_p (src_val)) + { + ipcp_value_source *s; + + /* If the lattice has already been propagated for the call site, + no need to do that again. */ + for (s = src_val->sources; s; s = s->next) + if (s->cs == cs) + return dest_lat->set_contains_variable (); + } + else + val_seeds.safe_push (src_val); + } + + /* Recursively generate lattice values with a limited count. */ + FOR_EACH_VEC_ELT (val_seeds, i, src_val) + { + for (int j = 1; j < PARAM_VALUE(PARAM_IPA_CP_MAX_RECURSIVE_DEPTH); j++) + { + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); + if (!cstval) + break; + + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, + src_offset, &src_val, true); + gcc_checking_assert (src_val); + } + } + ret |= dest_lat->set_contains_variable (); + } else for (src_val = src_lat->values; src_val; src_val = src_val->next) { - tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value, - parm_type); + /* Now we do not use self-recursively generated value as propagation + source, otherwise it is easy to make value space of normal lattice + overflow. */ + if (self_recursively_generated_p (src_val)) + { + ret |= dest_lat->set_contains_variable (); + continue; + } + tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, + src_val, res_type); if (cstval) - ret |= dest_lat->add_value (cstval, cs, src_val, src_idx); + ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, + src_offset); else ret |= dest_lat->set_contains_variable (); } @@ -1609,6 +1928,24 @@ propagate_vals_across_pass_through (cgra return ret; } +/* Propagate values through a pass-through jump function JFUNC associated with + edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX + is the index of the source parameter. PARM_TYPE is the type of the + parameter to which the result is passed. */ + +static bool +propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, + ipcp_lattice *src_lat, + ipcp_lattice *dest_lat, int src_idx, + tree parm_type) +{ + return propagate_vals_across_arith_jfunc (cs, + ipa_get_jf_pass_through_operation (jfunc), + NULL_TREE, + ipa_get_jf_pass_through_operand (jfunc), + src_lat, dest_lat, -1, src_idx, parm_type); +} + /* Propagate values through an ancestor jump function JFUNC associated with edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX is the index of the source parameter. */ @@ -1659,7 +1996,7 @@ propagate_scalar_across_jump_function (s else if (jfunc->type == IPA_JF_PASS_THROUGH || jfunc->type == IPA_JF_ANCESTOR) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); ipcp_lattice *src_lat; int src_idx; bool ret; @@ -1721,7 +2058,7 @@ propagate_context_across_jump_function ( if (jfunc->type == IPA_JF_PASS_THROUGH || jfunc->type == IPA_JF_ANCESTOR) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); int src_idx; ipcp_lattice *src_lat; @@ -1769,7 +2106,6 @@ propagate_context_across_jump_function ( added_sth = true; } } - } prop_fail: @@ -1797,7 +2133,7 @@ propagate_bits_across_jump_function (cgr enum availability availability; cgraph_node *callee = cs->callee->function_symbol (&availability); - struct ipa_node_params *callee_info = IPA_NODE_REF (callee); + class ipa_node_params *callee_info = IPA_NODE_REF (callee); tree parm_type = ipa_get_type (callee_info, idx); /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the @@ -1820,7 +2156,7 @@ propagate_bits_across_jump_function (cgr if (jfunc->type == IPA_JF_PASS_THROUGH || jfunc->type == IPA_JF_ANCESTOR) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); tree operand = NULL_TREE; enum tree_code code; unsigned src_idx; @@ -1840,7 +2176,7 @@ propagate_bits_across_jump_function (cgr operand = build_int_cstu (size_type_node, offset); } - struct ipcp_param_lattices *src_lats + class ipcp_param_lattices *src_lats = ipa_get_parm_lattices (caller_info, src_idx); /* Try to propagate bits if src_lattice is bottom, but jfunc is known. @@ -1894,7 +2230,7 @@ ipa_vr_operation_and_type_effects (value static bool propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, - struct ipcp_param_lattices *dest_plats, + class ipcp_param_lattices *dest_plats, tree param_type) { ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range; @@ -1913,10 +2249,10 @@ propagate_vr_across_jump_function (cgrap if (TREE_CODE_CLASS (operation) == tcc_unary) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); tree operand_type = ipa_get_type (caller_info, src_idx); - struct ipcp_param_lattices *src_lats + class ipcp_param_lattices *src_lats = ipa_get_parm_lattices (caller_info, src_idx); if (src_lats->m_value_range.bottom_p ()) @@ -1959,7 +2295,7 @@ propagate_vr_across_jump_function (cgrap aggs_by_ref to NEW_AGGS_BY_REF. */ static bool -set_check_aggs_by_ref (struct ipcp_param_lattices *dest_plats, +set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats, bool new_aggs_by_ref) { if (dest_plats->aggs) @@ -1986,7 +2322,7 @@ set_check_aggs_by_ref (struct ipcp_param true. */ static bool -merge_agg_lats_step (struct ipcp_param_lattices *dest_plats, +merge_agg_lats_step (class ipcp_param_lattices *dest_plats, HOST_WIDE_INT offset, HOST_WIDE_INT val_size, struct ipcp_agg_lattice ***aglat, bool pre_existing, bool *change) @@ -2064,8 +2400,8 @@ set_chain_of_aglats_contains_variable (s static bool merge_aggregate_lattices (struct cgraph_edge *cs, - struct ipcp_param_lattices *dest_plats, - struct ipcp_param_lattices *src_plats, + class ipcp_param_lattices *dest_plats, + class ipcp_param_lattices *src_plats, int src_idx, HOST_WIDE_INT offset_delta) { bool pre_existing = dest_plats->aggs != NULL; @@ -2119,7 +2455,7 @@ merge_aggregate_lattices (struct cgraph_ rules about propagating values passed by reference. */ static bool -agg_pass_through_permissible_p (struct ipcp_param_lattices *src_plats, +agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats, struct ipa_jump_func *jfunc) { return src_plats->aggs @@ -2127,13 +2463,92 @@ agg_pass_through_permissible_p (struct i || ipa_get_jf_pass_through_agg_preserved (jfunc)); } +/* Propagate values through ITEM, jump function for a part of an aggregate, + into corresponding aggregate lattice AGLAT. CS is the call graph edge + associated with the jump function. Return true if AGLAT changed in any + way. */ + +static bool +propagate_aggregate_lattice (struct cgraph_edge *cs, + struct ipa_agg_jf_item *item, + struct ipcp_agg_lattice *aglat) +{ + class ipa_node_params *caller_info; + class ipcp_param_lattices *src_plats; + struct ipcp_lattice *src_lat; + HOST_WIDE_INT src_offset; + int src_idx; + tree load_type; + bool ret; + + if (item->jftype == IPA_JF_CONST) + { + tree value = item->value.constant; + + gcc_checking_assert (is_gimple_ip_invariant (value)); + return aglat->add_value (value, cs, NULL, 0); + } + + gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH + || item->jftype == IPA_JF_LOAD_AGG); + + caller_info = IPA_NODE_REF (cs->caller); + src_idx = item->value.pass_through.formal_id; + src_plats = ipa_get_parm_lattices (caller_info, src_idx); + + if (item->jftype == IPA_JF_PASS_THROUGH) + { + load_type = NULL_TREE; + src_lat = &src_plats->itself; + src_offset = -1; + } + else + { + HOST_WIDE_INT load_offset = item->value.load_agg.offset; + struct ipcp_agg_lattice *src_aglat; + + for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next) + if (src_aglat->offset >= load_offset) + break; + + load_type = item->value.load_agg.type; + if (!src_aglat + || src_aglat->offset > load_offset + || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type)) + || src_plats->aggs_by_ref != item->value.load_agg.by_ref) + return aglat->set_contains_variable (); + + src_lat = src_aglat; + src_offset = load_offset; + } + + if (src_lat->bottom + || (!ipcp_versionable_function_p (cs->caller) + && !src_lat->is_single_const ())) + return aglat->set_contains_variable (); + + ret = propagate_vals_across_arith_jfunc (cs, + item->value.pass_through.operation, + load_type, + item->value.pass_through.operand, + src_lat, aglat, + src_offset, + src_idx, + item->type); + + if (src_lat->contains_variable) + ret |= aglat->set_contains_variable (); + + return ret; +} + /* Propagate scalar values across jump function JFUNC that is associated with edge CS and put the values into DEST_LAT. */ static bool propagate_aggs_across_jump_function (struct cgraph_edge *cs, struct ipa_jump_func *jfunc, - struct ipcp_param_lattices *dest_plats) + class ipcp_param_lattices *dest_plats) { bool ret = false; @@ -2143,9 +2558,9 @@ propagate_aggs_across_jump_function (str if (jfunc->type == IPA_JF_PASS_THROUGH && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); - struct ipcp_param_lattices *src_plats; + class ipcp_param_lattices *src_plats; src_plats = ipa_get_parm_lattices (caller_info, src_idx); if (agg_pass_through_permissible_p (src_plats, jfunc)) @@ -2162,9 +2577,9 @@ propagate_aggs_across_jump_function (str else if (jfunc->type == IPA_JF_ANCESTOR && ipa_get_jf_ancestor_agg_preserved (jfunc)) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); - struct ipcp_param_lattices *src_plats; + class ipcp_param_lattices *src_plats; src_plats = ipa_get_parm_lattices (caller_info, src_idx); if (src_plats->aggs && src_plats->aggs_by_ref) @@ -2194,15 +2609,14 @@ propagate_aggs_across_jump_function (str { HOST_WIDE_INT val_size; - if (item->offset < 0) + if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) continue; - gcc_checking_assert (is_gimple_ip_invariant (item->value)); - val_size = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (item->value))); + val_size = tree_to_shwi (TYPE_SIZE (item->type)); if (merge_agg_lats_step (dest_plats, item->offset, val_size, &aglat, pre_existing, &ret)) { - ret |= (*aglat)->add_value (item->value, cs, NULL, 0, 0); + ret |= propagate_aggregate_lattice (cs, item, *aglat); aglat = &(*aglat)->next; } else if (dest_plats->aggs_bottom) @@ -2235,10 +2649,10 @@ call_passes_through_thunk_p (cgraph_edge static bool propagate_constants_across_call (struct cgraph_edge *cs) { - struct ipa_node_params *callee_info; + class ipa_node_params *callee_info; enum availability availability; cgraph_node *callee; - struct ipa_edge_args *args; + class ipa_edge_args *args; bool ret = false; int i, args_count, parms_count; @@ -2247,12 +2661,21 @@ propagate_constants_across_call (struct return false; gcc_checking_assert (callee->has_gimple_body_p ()); callee_info = IPA_NODE_REF (callee); + if (!callee_info) + return false; args = IPA_EDGE_REF (cs); - args_count = ipa_get_cs_argument_count (args); parms_count = ipa_get_param_count (callee_info); if (parms_count == 0) return false; + if (!args) + { + for (i = 0; i < parms_count; i++) + ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, + i)); + return ret; + } + args_count = ipa_get_cs_argument_count (args); /* If this call goes through a thunk we must not propagate to the first (0th) parameter. However, we might need to uncover a thunk from below a series @@ -2269,7 +2692,7 @@ propagate_constants_across_call (struct for (; (i < args_count) && (i < parms_count); i++) { struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i); - struct ipcp_param_lattices *dest_plats; + class ipcp_param_lattices *dest_plats; tree param_type = ipa_get_type (callee_info, i); dest_plats = ipa_get_parm_lattices (callee_info, i); @@ -2308,7 +2731,7 @@ static tree ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, vec known_csts, vec known_contexts, - vec known_aggs, + vec known_aggs, struct ipa_agg_replacement_value *agg_reps, bool *speculative) { @@ -2346,9 +2769,9 @@ ipa_get_indirect_edge_target_1 (struct c } if (!t) { - struct ipa_agg_jump_function *agg; + struct ipa_agg_value_set *agg; if (known_aggs.length () > (unsigned int) param_index) - agg = known_aggs[param_index]; + agg = &known_aggs[param_index]; else agg = NULL; bool from_global_constant; @@ -2402,8 +2825,7 @@ ipa_get_indirect_edge_target_1 (struct c if (!t && known_aggs.length () > (unsigned int) param_index && !ie->indirect_info->by_ref) { - struct ipa_agg_jump_function *agg; - agg = known_aggs[param_index]; + struct ipa_agg_value_set *agg = &known_aggs[param_index]; t = ipa_find_agg_cst_for_param (agg, known_csts[param_index], ie->indirect_info->offset, true); } @@ -2526,7 +2948,7 @@ tree ipa_get_indirect_edge_target (struct cgraph_edge *ie, vec known_csts, vec known_contexts, - vec known_aggs, + vec known_aggs, bool *speculative) { return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, @@ -2540,7 +2962,7 @@ static int devirtualization_time_bonus (struct cgraph_node *node, vec known_csts, vec known_contexts, - vec known_aggs) + vec known_aggs) { struct cgraph_edge *ie; int res = 0; @@ -2548,7 +2970,7 @@ devirtualization_time_bonus (struct cgra for (ie = node->indirect_calls; ie; ie = ie->next_callee) { struct cgraph_node *callee; - struct ipa_fn_summary *isummary; + class ipa_fn_summary *isummary; enum availability avail; tree target; bool speculative; @@ -2570,13 +2992,14 @@ devirtualization_time_bonus (struct cgra if (!isummary || !isummary->inlinable) continue; + int size = ipa_size_summaries->get (callee)->size; /* FIXME: The values below need re-considering and perhaps also integrating into the cost metrics, at lest in some very basic way. */ - if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4) + if (size <= MAX_INLINE_INSNS_AUTO / 4) res += 31 / ((int)speculative + 1); - else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2) + else if (size <= MAX_INLINE_INSNS_AUTO / 2) res += 15 / ((int)speculative + 1); - else if (isummary->size <= MAX_INLINE_INSNS_AUTO + else if (size <= MAX_INLINE_INSNS_AUTO || DECL_DECLARED_INLINE_P (callee->decl)) res += 7 / ((int)speculative + 1); } @@ -2601,7 +3024,7 @@ hint_time_bonus (ipa_hints hints) static inline int64_t incorporate_penalties (ipa_node_params *info, int64_t evaluation) { - if (info->node_within_scc) + if (info->node_within_scc && !info->node_is_self_scc) evaluation = (evaluation * (100 - PARAM_VALUE (PARAM_IPA_CP_RECURSION_PENALTY))) / 100; @@ -2628,7 +3051,7 @@ good_cloning_opportunity_p (struct cgrap gcc_assert (size_cost > 0); - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); if (max_count > profile_count::zero ()) { int factor = RDIV (count_sum.probability_in @@ -2645,7 +3068,8 @@ good_cloning_opportunity_p (struct cgrap count_sum.dump (dump_file); fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64 ", threshold: %i\n", - info->node_within_scc ? ", scc" : "", + info->node_within_scc + ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", info->node_calling_single_call ? ", single_call" : "", evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD)); } @@ -2663,7 +3087,8 @@ good_cloning_opportunity_p (struct cgrap "size: %i, freq_sum: %i%s%s) -> evaluation: " "%" PRId64 ", threshold: %i\n", time_benefit, size_cost, freq_sum, - info->node_within_scc ? ", scc" : "", + info->node_within_scc + ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", info->node_calling_single_call ? ", single_call" : "", evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD)); @@ -2674,25 +3099,25 @@ good_cloning_opportunity_p (struct cgrap /* Return all context independent values from aggregate lattices in PLATS in a vector. Return NULL if there are none. */ -static vec * -context_independent_aggregate_values (struct ipcp_param_lattices *plats) +static vec +context_independent_aggregate_values (class ipcp_param_lattices *plats) { - vec *res = NULL; + vec res = vNULL; if (plats->aggs_bottom || plats->aggs_contain_variable || plats->aggs_count == 0) - return NULL; + return vNULL; for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) if (aglat->is_single_const ()) { - struct ipa_agg_jf_item item; + struct ipa_agg_value item; item.offset = aglat->offset; item.value = aglat->values->value; - vec_safe_push (res, item); + res.safe_push (item); } return res; } @@ -2704,11 +3129,11 @@ context_independent_aggregate_values (st it. */ static bool -gather_context_independent_values (struct ipa_node_params *info, +gather_context_independent_values (class ipa_node_params *info, vec *known_csts, vec *known_contexts, - vec *known_aggs, + vec *known_aggs, int *removable_params_cost) { int i, count = ipa_get_param_count (info); @@ -2729,7 +3154,7 @@ gather_context_independent_values (struc for (i = 0; i < count; i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); ipcp_lattice *lat = &plats->itself; if (lat->is_single_const ()) @@ -2758,40 +3183,20 @@ gather_context_independent_values (struc if (known_aggs) { - vec *agg_items; - struct ipa_agg_jump_function *ajf; + vec agg_items; + struct ipa_agg_value_set *agg; agg_items = context_independent_aggregate_values (plats); - ajf = &(*known_aggs)[i]; - ajf->items = agg_items; - ajf->by_ref = plats->aggs_by_ref; - ret |= agg_items != NULL; + agg = &(*known_aggs)[i]; + agg->items = agg_items; + agg->by_ref = plats->aggs_by_ref; + ret |= !agg_items.is_empty (); } } return ret; } -/* The current interface in ipa-inline-analysis requires a pointer vector. - Create it. - - FIXME: That interface should be re-worked, this is slightly silly. Still, - I'd like to discuss how to change it first and this demonstrates the - issue. */ - -static vec -agg_jmp_p_vec_for_t_vec (vec known_aggs) -{ - vec ret; - struct ipa_agg_jump_function *ajf; - int i; - - ret.create (known_aggs.length ()); - FOR_EACH_VEC_ELT (known_aggs, i, ajf) - ret.quick_push (ajf); - return ret; -} - /* Perform time and size measurement of NODE with the context given in KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of @@ -2801,7 +3206,7 @@ agg_jmp_p_vec_for_t_vec (vec known_csts, vec known_contexts, - vec known_aggs_ptrs, + vec known_aggs, int removable_params_cost, int est_move_cost, ipcp_value_base *val) { @@ -2810,7 +3215,7 @@ perform_estimation_of_a_value (cgraph_no ipa_hints hints; estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, - known_aggs_ptrs, &size, &time, + known_aggs, &size, &time, &base_time, &hints); base_time -= time; if (base_time > 65535) @@ -2824,7 +3229,7 @@ perform_estimation_of_a_value (cgraph_no else time_benefit = base_time.to_int () + devirtualization_time_bonus (node, known_csts, known_contexts, - known_aggs_ptrs) + known_aggs) + hint_time_bonus (hints) + removable_params_cost + est_move_cost; @@ -2846,12 +3251,11 @@ perform_estimation_of_a_value (cgraph_no static void estimate_local_effects (struct cgraph_node *node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); int i, count = ipa_get_param_count (info); vec known_csts; vec known_contexts; - vec known_aggs; - vec known_aggs_ptrs; + vec known_aggs; bool always_const; int removable_params_cost; @@ -2864,9 +3268,8 @@ estimate_local_effects (struct cgraph_no always_const = gather_context_independent_values (info, &known_csts, &known_contexts, &known_aggs, &removable_params_cost); - known_aggs_ptrs = agg_jmp_p_vec_for_t_vec (known_aggs); int devirt_bonus = devirtualization_time_bonus (node, known_csts, - known_contexts, known_aggs_ptrs); + known_contexts, known_aggs); if (always_const || devirt_bonus || (removable_params_cost && node->local.can_change_signature)) { @@ -2879,7 +3282,7 @@ estimate_local_effects (struct cgraph_no node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, - known_aggs_ptrs, &size, &time, + known_aggs, &size, &time, &base_time, &hints); time -= devirt_bonus; time -= hint_time_bonus (hints); @@ -2926,7 +3329,7 @@ estimate_local_effects (struct cgraph_no for (i = 0; i < count; i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); ipcp_lattice *lat = &plats->itself; ipcp_value *val; @@ -2942,7 +3345,7 @@ estimate_local_effects (struct cgraph_no int emc = estimate_move_cost (TREE_TYPE (val->value), true); perform_estimation_of_a_value (node, known_csts, known_contexts, - known_aggs_ptrs, + known_aggs, removable_params_cost, emc, val); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2960,7 +3363,7 @@ estimate_local_effects (struct cgraph_no for (i = 0; i < count; i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); if (!plats->virt_call) continue; @@ -2977,7 +3380,7 @@ estimate_local_effects (struct cgraph_no { known_contexts[i] = val->value; perform_estimation_of_a_value (node, known_csts, known_contexts, - known_aggs_ptrs, + known_aggs, removable_params_cost, 0, val); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2995,14 +3398,14 @@ estimate_local_effects (struct cgraph_no for (i = 0; i < count; i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - struct ipa_agg_jump_function *ajf; + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + struct ipa_agg_value_set *agg; struct ipcp_agg_lattice *aglat; if (plats->aggs_bottom || !plats->aggs) continue; - ajf = &known_aggs[i]; + agg = &known_aggs[i]; for (aglat = plats->aggs; aglat; aglat = aglat->next) { ipcp_value *val; @@ -3014,14 +3417,14 @@ estimate_local_effects (struct cgraph_no for (val = aglat->values; val; val = val->next) { - struct ipa_agg_jf_item item; + struct ipa_agg_value item; item.offset = aglat->offset; item.value = val->value; - vec_safe_push (ajf->items, item); + agg->items.safe_push (item); perform_estimation_of_a_value (node, known_csts, known_contexts, - known_aggs_ptrs, + known_aggs, removable_params_cost, 0, val); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -3037,18 +3440,14 @@ estimate_local_effects (struct cgraph_no val->local_time_benefit, val->local_size_cost); } - ajf->items->pop (); + agg->items.pop (); } } } - for (i = 0; i < count; i++) - vec_free (known_aggs[i].items); - known_csts.release (); known_contexts.release (); - known_aggs.release (); - known_aggs_ptrs.release (); + ipa_release_agg_values (known_aggs); } @@ -3112,12 +3511,12 @@ value_topo_info::add_val (ipcp_ static void add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); int i, count = ipa_get_param_count (info); for (i = 0; i < count; i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); ipcp_lattice *lat = &plats->itself; struct ipcp_agg_lattice *aglat; @@ -3152,7 +3551,7 @@ add_all_node_vals_to_toposort (cgraph_no connected components. */ static void -propagate_constants_topo (struct ipa_topo_info *topo) +propagate_constants_topo (class ipa_topo_info *topo) { int i; @@ -3166,20 +3565,46 @@ propagate_constants_topo (struct ipa_top until all lattices stabilize. */ FOR_EACH_VEC_ELT (cycle_nodes, j, v) if (v->has_gimple_body_p ()) - push_node_to_stack (topo, v); + { + if (opt_for_fn (v->decl, flag_ipa_cp)) + push_node_to_stack (topo, v); + /* When V is not optimized, we can not push it to stac, but + still we need to set all its callees lattices to bottom. */ + else + { + for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee) + propagate_constants_across_call (cs); + } + } v = pop_node_from_stack (topo); while (v) { struct cgraph_edge *cs; + class ipa_node_params *info = NULL; + bool self_scc = true; for (cs = v->callees; cs; cs = cs->next_callee) if (ipa_edge_within_scc (cs)) { - IPA_NODE_REF (v)->node_within_scc = true; + cgraph_node *callee = cs->callee->function_symbol (); + + if (v != callee) + self_scc = false; + + if (!info) + { + info = IPA_NODE_REF (v); + info->node_within_scc = true; + } + if (propagate_constants_across_call (cs)) - push_node_to_stack (topo, cs->callee->function_symbol ()); + push_node_to_stack (topo, callee); } + + if (info) + info->node_is_self_scc = self_scc; + v = pop_node_from_stack (topo); } @@ -3187,7 +3612,8 @@ propagate_constants_topo (struct ipa_top the local effects of the discovered constants and all valid values to their topological sort. */ FOR_EACH_VEC_ELT (cycle_nodes, j, v) - if (v->has_gimple_body_p ()) + if (v->has_gimple_body_p () + && opt_for_fn (v->decl, flag_ipa_cp)) { struct cgraph_edge *cs; @@ -3255,7 +3681,7 @@ value_topo_info::propagate_effe summaries interprocedurally. */ static void -ipcp_propagate_stage (struct ipa_topo_info *topo) +ipcp_propagate_stage (class ipa_topo_info *topo) { struct cgraph_node *node; @@ -3266,16 +3692,15 @@ ipcp_propagate_stage (struct ipa_topo_in FOR_EACH_DEFINED_FUNCTION (node) { - struct ipa_node_params *info = IPA_NODE_REF (node); - - determine_versionability (node, info); - if (node->has_gimple_body_p ()) + if (node->has_gimple_body_p () && opt_for_fn (node->decl, flag_ipa_cp)) { - info->lattices = XCNEWVEC (struct ipcp_param_lattices, + class ipa_node_params *info = IPA_NODE_REF (node); + determine_versionability (node, info); + info->lattices = XCNEWVEC (class ipcp_param_lattices, ipa_get_param_count (info)); initialize_node_lattices (node); } - ipa_fn_summary *s = ipa_fn_summaries->get (node); + ipa_size_summary *s = ipa_size_summaries->get (node); if (node->definition && !node->alias && s != NULL) overall_size += s->self_size; max_count = max_count.max (node->count.ipa ()); @@ -3335,7 +3760,7 @@ ipcp_discover_new_direct_edges (struct c if (cs && !agg_contents && !polymorphic) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); int c = ipa_get_controlled_uses (info, param_index); if (c != IPA_UNDESCRIBED_USE) { @@ -3415,26 +3840,6 @@ edge_clone_summary_t::duplicate (cgraph_ src_data->next_clone = dst_edge; } -/* See if NODE is a clone with a known aggregate value at a given OFFSET of a - parameter with the given INDEX. */ - -static tree -get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset, - int index) -{ - struct ipa_agg_replacement_value *aggval; - - aggval = ipa_get_agg_replacements_for_node (node); - while (aggval) - { - if (aggval->offset == offset - && aggval->index == index) - return aggval->value; - aggval = aggval->next; - } - return NULL_TREE; -} - /* Return true is NODE is DEST or its clone for all contexts. */ static bool @@ -3443,7 +3848,7 @@ same_node_or_its_all_contexts_clone_p (c if (node == dest) return true; - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); return info->is_all_contexts_clone && info->ipcp_orig_node == dest; } @@ -3454,12 +3859,12 @@ static bool cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source *src, cgraph_node *dest, ipcp_value *dest_val) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); enum availability availability; cgraph_node *real_dest = cs->callee->function_symbol (&availability); - if (!same_node_or_its_all_contexts_clone_p (real_dest, dest) - || availability <= AVAIL_INTERPOSABLE + if (availability <= AVAIL_INTERPOSABLE + || !same_node_or_its_all_contexts_clone_p (real_dest, dest) || caller_info->node_dead) return false; @@ -3485,7 +3890,7 @@ cgraph_edge_brings_value_p (cgraph_edge return true; struct ipcp_agg_lattice *aglat; - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, + class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, src->index); if (src->offset == -1) return (plats->itself.is_single_const () @@ -3514,10 +3919,12 @@ cgraph_edge_brings_value_p (cgraph_edge cgraph_node *dest, ipcp_value *) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - cgraph_node *real_dest = cs->callee->function_symbol (); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + enum availability avail; + cgraph_node *real_dest = cs->callee->function_symbol (&avail); - if (!same_node_or_its_all_contexts_clone_p (real_dest, dest) + if (avail <= AVAIL_INTERPOSABLE + || !same_node_or_its_all_contexts_clone_p (real_dest, dest) || caller_info->node_dead) return false; if (!src->val) @@ -3528,7 +3935,7 @@ cgraph_edge_brings_value_p (cgraph_edge && values_equal_for_ipcp_p (src->val->value, caller_info->known_contexts[src->index]); - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, + class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, src->index); return plats->ctxlat.is_single_const () && values_equal_for_ipcp_p (src->val->value, @@ -3575,6 +3982,9 @@ get_info_about_necessary_edges (ipcp_val hot |= cs->maybe_hot_p (); if (cs->caller != dest) non_self_recursive = true; + else if (src->val) + gcc_assert (values_equal_for_ipcp_p (src->val->value, + val->value)); } cs = get_next_cgraph_edge_clone (cs); } @@ -3588,6 +3998,19 @@ get_info_about_necessary_edges (ipcp_val *freq_sum = freq; *count_sum = cnt; *caller_count = count; + + if (!hot && IPA_NODE_REF (dest)->node_within_scc) + { + struct cgraph_edge *cs; + + /* Cold non-SCC source edge could trigger hot recursive execution of + function. Consider the case as hot and rely on following cost model + computation to further select right one. */ + for (cs = dest->callers; cs; cs = cs->next_caller) + if (cs->caller == dest && cs->maybe_hot_p ()) + return true; + } + return hot; } @@ -3621,7 +4044,7 @@ gather_edges_for_value (ipcp_value callers) { - struct ipa_node_params *new_info, *info = IPA_NODE_REF (node); + class ipa_node_params *new_info, *info = IPA_NODE_REF (node); vec *replace_trees = NULL; struct ipa_agg_replacement_value *av; struct cgraph_node *new_node; @@ -3891,6 +4314,7 @@ create_specialized_node (struct cgraph_n update_profiling_info (node, new_node); new_info = IPA_NODE_REF (new_node); new_info->ipcp_orig_node = node; + new_node->ipcp_clone = true; new_info->known_csts = known_csts; new_info->known_contexts = known_contexts; @@ -3924,7 +4348,7 @@ find_more_scalar_values_for_callers_subs vec known_csts, vec callers) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); int i, count = ipa_get_param_count (info); for (i = 0; i < count; i++) @@ -3946,7 +4370,8 @@ find_more_scalar_values_for_callers_subs if (IPA_NODE_REF (cs->caller)->node_dead) continue; - if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) + if (!IPA_EDGE_REF (cs) + || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) || (i == 0 && call_passes_through_thunk_p (cs))) { @@ -4015,7 +4440,8 @@ find_more_contexts_for_caller_subset (cg FOR_EACH_VEC_ELT (callers, j, cs) { - if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) + if (!IPA_EDGE_REF (cs) + || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) return; ipa_jump_func *jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); @@ -4056,10 +4482,10 @@ find_more_contexts_for_caller_subset (cg /* Go through PLATS and create a vector of values consisting of values and offsets (minus OFFSET) of lattices that contain only a single value. */ -static vec -copy_plats_to_inter (struct ipcp_param_lattices *plats, HOST_WIDE_INT offset) +static vec +copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset) { - vec res = vNULL; + vec res = vNULL; if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) return vNULL; @@ -4067,7 +4493,7 @@ copy_plats_to_inter (struct ipcp_param_l for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) if (aglat->is_single_const ()) { - struct ipa_agg_jf_item ti; + struct ipa_agg_value ti; ti.offset = aglat->offset - offset; ti.value = aglat->values->value; res.safe_push (ti); @@ -4079,12 +4505,12 @@ copy_plats_to_inter (struct ipcp_param_l subtracting OFFSET). */ static void -intersect_with_plats (struct ipcp_param_lattices *plats, - vec *inter, +intersect_with_plats (class ipcp_param_lattices *plats, + vec *inter, HOST_WIDE_INT offset) { struct ipcp_agg_lattice *aglat; - struct ipa_agg_jf_item *item; + struct ipa_agg_value *item; int k; if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) @@ -4122,18 +4548,18 @@ intersect_with_plats (struct ipcp_param_ /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the vector result while subtracting OFFSET from the individual value offsets. */ -static vec +static vec agg_replacements_to_vector (struct cgraph_node *node, int index, HOST_WIDE_INT offset) { struct ipa_agg_replacement_value *av; - vec res = vNULL; + vec res = vNULL; for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next) if (av->index == index && (av->offset - offset) >= 0) { - struct ipa_agg_jf_item item; + struct ipa_agg_value item; gcc_checking_assert (av->value); item.offset = av->offset - offset; item.value = av->value; @@ -4149,11 +4575,11 @@ agg_replacements_to_vector (struct cgrap static void intersect_with_agg_replacements (struct cgraph_node *node, int index, - vec *inter, + vec *inter, HOST_WIDE_INT offset) { struct ipa_agg_replacement_value *srcvals; - struct ipa_agg_jf_item *item; + struct ipa_agg_value *item; int i; srcvals = ipa_get_agg_replacements_for_node (node); @@ -4190,22 +4616,22 @@ intersect_with_agg_replacements (struct copy all incoming values to it. If we determine we ended up with no values whatsoever, return a released vector. */ -static vec +static vec intersect_aggregates_with_edge (struct cgraph_edge *cs, int index, - vec inter) + vec inter) { struct ipa_jump_func *jfunc; jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), index); if (jfunc->type == IPA_JF_PASS_THROUGH && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); if (caller_info->ipcp_orig_node) { struct cgraph_node *orig_node = caller_info->ipcp_orig_node; - struct ipcp_param_lattices *orig_plats; + class ipcp_param_lattices *orig_plats; orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node), src_idx); if (agg_pass_through_permissible_p (orig_plats, jfunc)) @@ -4224,7 +4650,7 @@ intersect_aggregates_with_edge (struct c } else { - struct ipcp_param_lattices *src_plats; + class ipcp_param_lattices *src_plats; src_plats = ipa_get_parm_lattices (caller_info, src_idx); if (agg_pass_through_permissible_p (src_plats, jfunc)) { @@ -4246,9 +4672,9 @@ intersect_aggregates_with_edge (struct c else if (jfunc->type == IPA_JF_ANCESTOR && ipa_get_jf_ancestor_agg_preserved (jfunc)) { - struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); - struct ipcp_param_lattices *src_plats; + class ipcp_param_lattices *src_plats; HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc); if (caller_info->ipcp_orig_node) @@ -4273,12 +4699,26 @@ intersect_aggregates_with_edge (struct c } else if (jfunc->agg.items) { - struct ipa_agg_jf_item *item; + class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + struct ipa_agg_value *item; int k; if (!inter.exists ()) for (unsigned i = 0; i < jfunc->agg.items->length (); i++) - inter.safe_push ((*jfunc->agg.items)[i]); + { + struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i]; + tree value = ipa_agg_value_from_node (caller_info, cs->caller, + agg_item); + if (value) + { + struct ipa_agg_value agg_value; + + agg_value.offset = agg_item->offset; + agg_value.value = value; + + inter.safe_push (agg_value); + } + } else FOR_EACH_VEC_ELT (inter, k, item) { @@ -4296,9 +4736,10 @@ intersect_aggregates_with_edge (struct c break; if (ti->offset == item->offset) { - gcc_checking_assert (ti->value); - if (values_equal_for_ipcp_p (item->value, - ti->value)) + tree value = ipa_agg_value_from_node (caller_info, + cs->caller, ti); + if (value + && values_equal_for_ipcp_p (item->value, value)) found = true; break; } @@ -4311,7 +4752,7 @@ intersect_aggregates_with_edge (struct c else { inter.release (); - return vec(); + return vNULL; } return inter; } @@ -4323,7 +4764,7 @@ static struct ipa_agg_replacement_value find_aggregate_values_for_callers_subset (struct cgraph_node *node, vec callers) { - struct ipa_node_params *dest_info = IPA_NODE_REF (node); + class ipa_node_params *dest_info = IPA_NODE_REF (node); struct ipa_agg_replacement_value *res; struct ipa_agg_replacement_value **tail = &res; struct cgraph_edge *cs; @@ -4331,6 +4772,11 @@ find_aggregate_values_for_callers_subset FOR_EACH_VEC_ELT (callers, j, cs) { + if (!IPA_EDGE_REF (cs)) + { + count = 0; + break; + } int c = ipa_get_cs_argument_count (IPA_EDGE_REF (cs)); if (c < count) count = c; @@ -4339,9 +4785,9 @@ find_aggregate_values_for_callers_subset for (i = 0; i < count; i++) { struct cgraph_edge *cs; - vec inter = vNULL; - struct ipa_agg_jf_item *item; - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i); + vec inter = vNULL; + struct ipa_agg_value *item; + class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i); int j; /* Among other things, the following check should deal with all by_ref @@ -4394,10 +4840,10 @@ static bool cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs, struct cgraph_node *node) { - struct ipa_node_params *dest_info = IPA_NODE_REF (node); + class ipa_node_params *dest_info = IPA_NODE_REF (node); int count = ipa_get_param_count (dest_info); - struct ipa_node_params *caller_info; - struct ipa_edge_args *args; + class ipa_node_params *caller_info; + class ipa_edge_args *args; int i; caller_info = IPA_NODE_REF (cs->caller); @@ -4428,8 +4874,7 @@ static bool cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs, struct cgraph_node *node) { - struct ipa_node_params *orig_caller_info = IPA_NODE_REF (cs->caller); - struct ipa_node_params *orig_node_info; + class ipa_node_params *orig_node_info; struct ipa_agg_replacement_value *aggval; int i, ec, count; @@ -4445,12 +4890,10 @@ cgraph_edge_brings_all_agg_vals_for_node return false; orig_node_info = IPA_NODE_REF (IPA_NODE_REF (node)->ipcp_orig_node); - if (orig_caller_info->ipcp_orig_node) - orig_caller_info = IPA_NODE_REF (orig_caller_info->ipcp_orig_node); for (i = 0; i < count; i++) { - struct ipcp_param_lattices *plats; + class ipcp_param_lattices *plats; bool interesting = false; for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) if (aggval->index == i) @@ -4465,15 +4908,14 @@ cgraph_edge_brings_all_agg_vals_for_node if (plats->aggs_bottom) return false; - vec values - = intersect_aggregates_with_edge (cs, i, vNULL); + vec values = intersect_aggregates_with_edge (cs, i, vNULL); if (!values.exists ()) return false; for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) if (aggval->index == i) { - struct ipa_agg_jf_item *item; + struct ipa_agg_value *item; int j; bool found = false; FOR_EACH_VEC_ELT (values, j, item) @@ -4708,11 +5150,10 @@ decide_about_value (struct cgraph_node * static bool decide_whether_version_node (struct cgraph_node *node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); int i, count = ipa_get_param_count (info); vec known_csts; vec known_contexts; - vec known_aggs = vNULL; bool ret = false; if (count == 0) @@ -4723,12 +5164,11 @@ decide_whether_version_node (struct cgra node->dump_name ()); gather_context_independent_values (info, &known_csts, &known_contexts, - info->do_clone_for_all_contexts ? &known_aggs - : NULL, NULL); + NULL, NULL); for (i = 0; i < count;i++) { - struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); + class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); ipcp_lattice *lat = &plats->itself; ipcp_lattice *ctxlat = &plats->ctxlat; @@ -4793,9 +5233,6 @@ decide_whether_version_node (struct cgra info = IPA_NODE_REF (node); info->do_clone_for_all_contexts = false; IPA_NODE_REF (clone)->is_all_contexts_clone = true; - for (i = 0; i < count; i++) - vec_free (known_aggs[i].items); - known_aggs.release (); ret = true; } else @@ -4818,7 +5255,7 @@ spread_undeadness (struct cgraph_node *n if (ipa_edge_within_scc (cs)) { struct cgraph_node *callee; - struct ipa_node_params *info; + class ipa_node_params *info; callee = cs->callee->function_symbol (NULL); info = IPA_NODE_REF (callee); @@ -4881,7 +5318,7 @@ identify_dead_nodes (struct cgraph_node TOPO and make specialized clones if deemed beneficial. */ static void -ipcp_decision_stage (struct ipa_topo_info *topo) +ipcp_decision_stage (class ipa_topo_info *topo) { int i; @@ -4923,7 +5360,7 @@ ipcp_store_bits_results (void) bool dumped_sth = false; bool found_useful_result = false; - if (!opt_for_fn (node->decl, flag_ipa_bit_cp)) + if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info) { if (dump_file) fprintf (dump_file, "Not considering %s for ipa bitwise propagation " @@ -5055,7 +5492,7 @@ ipcp_store_vr_results (void) static unsigned int ipcp_driver (void) { - struct ipa_topo_info topo; + class ipa_topo_info topo; if (edge_clone_summaries == NULL) edge_clone_summaries = new edge_clone_summary_t (symtab); diff -Nurp a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c --- a/gcc/ipa-devirt.c 2020-04-30 15:14:04.624000000 +0800 +++ b/gcc/ipa-devirt.c 2020-04-30 15:14:56.624000000 +0800 @@ -172,6 +172,11 @@ struct default_hash_traits } }; +/* HACK alert: this is used to communicate with ipa-inline-transform that + thunk is being expanded and there is no need to clear the polymorphic + call target cache. */ +bool thunk_expansion; + static bool odr_types_equivalent_p (tree, tree, bool, bool *, hash_set *, location_t, location_t); @@ -2557,7 +2562,7 @@ maybe_record_node (vec & || target_node->definition) && target_node->real_symbol_p ()) { - gcc_assert (!target_node->global.inlined_to); + gcc_assert (!target_node->inlined_to); gcc_assert (target_node->real_symbol_p ()); /* When sanitizing, do not assume that __cxa_pure_virtual is not called by valid program. */ @@ -2892,6 +2897,7 @@ static void devirt_node_removal_hook (struct cgraph_node *n, void *d ATTRIBUTE_UNUSED) { if (cached_polymorphic_call_targets + && !thunk_expansion && cached_polymorphic_call_targets->contains (n)) free_polymorphic_call_targets_hash (); } diff -Nurp a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c --- a/gcc/ipa-fnsummary.c 2020-04-30 15:14:04.568000000 +0800 +++ b/gcc/ipa-fnsummary.c 2020-04-30 15:14:56.664000000 +0800 @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. /* Summaries. */ fast_function_summary *ipa_fn_summaries; +fast_function_summary *ipa_size_summaries; fast_call_summary *ipa_call_summaries; /* Edge predicates goes here. */ @@ -207,7 +208,7 @@ ipa_fn_summary::account_size_time (int s } if (!found) { - struct size_time_entry new_entry; + class size_time_entry new_entry; new_entry.size = size; new_entry.time = time; new_entry.exec_predicate = exec_pred; @@ -236,7 +237,7 @@ redirect_to_unreachable (struct cgraph_e e->make_direct (target); else e->redirect_callee (target); - struct ipa_call_summary *es = ipa_call_summaries->get (e); + class ipa_call_summary *es = ipa_call_summaries->get (e); e->inline_failed = CIF_UNREACHABLE; e->count = profile_count::zero (); es->call_stmt_size = 0; @@ -261,7 +262,7 @@ edge_set_predicate (struct cgraph_edge * && (!e->speculative || e->callee)) e = redirect_to_unreachable (e); - struct ipa_call_summary *es = ipa_call_summaries->get (e); + class ipa_call_summary *es = ipa_call_summaries->get (e); if (predicate && *predicate != true) { if (!es->predicate) @@ -306,9 +307,9 @@ set_hint_predicate (predicate **p, predi the fact that parameter is indeed a constant. KNOWN_VALS is partial mapping of parameters of NODE to constant values. - KNOWN_AGGS is a vector of aggreggate jump functions for each parameter. - Return clause of possible truths. When INLINE_P is true, assume that we are - inlining. + KNOWN_AGGS is a vector of aggreggate known offset/value set for each + parameter. Return clause of possible truths. When INLINE_P is true, assume + that we are inlining. ERROR_MARK means compile time invariant. */ @@ -316,14 +317,13 @@ static void evaluate_conditions_for_known_args (struct cgraph_node *node, bool inline_p, vec known_vals, - vec - known_aggs, + vec known_aggs, clause_t *ret_clause, clause_t *ret_nonspec_clause) { clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition; clause_t nonspec_clause = 1 << predicate::not_inlined_condition; - struct ipa_fn_summary *info = ipa_fn_summaries->get (node); + class ipa_fn_summary *info = ipa_fn_summaries->get (node); int i; struct condition *c; @@ -331,6 +331,8 @@ evaluate_conditions_for_known_args (stru { tree val; tree res; + int j; + struct expr_eval_op *op; /* We allow call stmt to have fewer arguments than the callee function (especially for K&R style programs). So bound check here (we assume @@ -347,7 +349,7 @@ evaluate_conditions_for_known_args (stru if (c->agg_contents) { - struct ipa_agg_jump_function *agg; + struct ipa_agg_value_set *agg; if (c->code == predicate::changed && !c->by_ref @@ -356,7 +358,7 @@ evaluate_conditions_for_known_args (stru if (known_aggs.exists ()) { - agg = known_aggs[c->operand_num]; + agg = &known_aggs[c->operand_num]; val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num], c->offset, c->by_ref); } @@ -382,7 +384,7 @@ evaluate_conditions_for_known_args (stru continue; } - if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size) + if (TYPE_SIZE (c->type) != TYPE_SIZE (TREE_TYPE (val))) { clause |= 1 << (i + predicate::first_dynamic_condition); nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); @@ -394,7 +396,30 @@ evaluate_conditions_for_known_args (stru continue; } - val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val); + val = fold_unary (VIEW_CONVERT_EXPR, c->type, val); + for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) + { + if (!val) + break; + if (!op->val[0]) + val = fold_unary (op->code, op->type, val); + else if (!op->val[1]) + val = fold_binary (op->code, op->type, + op->index ? op->val[0] : val, + op->index ? val : op->val[0]); + else if (op->index == 0) + val = fold_ternary (op->code, op->type, + val, op->val[0], op->val[1]); + else if (op->index == 1) + val = fold_ternary (op->code, op->type, + op->val[0], val, op->val[1]); + else if (op->index == 2) + val = fold_ternary (op->code, op->type, + op->val[0], op->val[1], val); + else + val = NULL_TREE; + } + res = val ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val) : NULL; @@ -420,12 +445,13 @@ evaluate_properties_for_edge (struct cgr vec *known_vals_ptr, vec *known_contexts_ptr, - vec *known_aggs_ptr) + vec *known_aggs_ptr) { struct cgraph_node *callee = e->callee->ultimate_alias_target (); - struct ipa_fn_summary *info = ipa_fn_summaries->get (callee); + class ipa_fn_summary *info = ipa_fn_summaries->get (callee); vec known_vals = vNULL; - vec known_aggs = vNULL; + vec known_aggs = vNULL; + class ipa_edge_args *args; if (clause_ptr) *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition; @@ -436,18 +462,20 @@ evaluate_properties_for_edge (struct cgr if (ipa_node_params_sum && !e->call_stmt_cannot_inline_p - && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)) + && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr) + && (args = IPA_EDGE_REF (e)) != NULL) { - struct ipa_node_params *caller_parms_info, *callee_pi; - struct ipa_edge_args *args = IPA_EDGE_REF (e); - struct ipa_call_summary *es = ipa_call_summaries->get (e); + struct cgraph_node *caller; + class ipa_node_params *caller_parms_info, *callee_pi; + class ipa_call_summary *es = ipa_call_summaries->get (e); int i, count = ipa_get_cs_argument_count (args); - if (e->caller->global.inlined_to) - caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to); + if (e->caller->inlined_to) + caller = e->caller->inlined_to; else - caller_parms_info = IPA_NODE_REF (e->caller); - callee_pi = IPA_NODE_REF (e->callee); + caller = e->caller; + caller_parms_info = IPA_NODE_REF (caller); + callee_pi = IPA_NODE_REF (callee); if (count && (info->conds || known_vals_ptr)) known_vals.safe_grow_cleared (count); @@ -456,36 +484,38 @@ evaluate_properties_for_edge (struct cgr if (count && known_contexts_ptr) known_contexts_ptr->safe_grow_cleared (count); - for (i = 0; i < count; i++) - { - struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); - tree cst = ipa_value_from_jfunc (caller_parms_info, jf, - ipa_get_type (callee_pi, i)); - - if (!cst && e->call_stmt - && i < (int)gimple_call_num_args (e->call_stmt)) - { - cst = gimple_call_arg (e->call_stmt, i); - if (!is_gimple_min_invariant (cst)) - cst = NULL; - } - if (cst) - { - gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); - if (known_vals.exists ()) - known_vals[i] = cst; - } - else if (inline_p && !es->param[i].change_prob) - known_vals[i] = error_mark_node; - - if (known_contexts_ptr) - (*known_contexts_ptr)[i] - = ipa_context_from_jfunc (caller_parms_info, e, i, jf); - /* TODO: When IPA-CP starts propagating and merging aggregate jump - functions, use its knowledge of the caller too, just like the - scalar case above. */ - known_aggs[i] = &jf->agg; - } + if (callee_pi) + for (i = 0; i < count; i++) + { + struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); + tree cst = ipa_value_from_jfunc (caller_parms_info, jf, + ipa_get_type (callee_pi, i)); + + if (!cst && e->call_stmt + && i < (int)gimple_call_num_args (e->call_stmt)) + { + cst = gimple_call_arg (e->call_stmt, i); + if (!is_gimple_min_invariant (cst)) + cst = NULL; + } + if (cst) + { + gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); + if (known_vals.exists ()) + known_vals[i] = cst; + } + else if (inline_p && !es->param[i].change_prob) + known_vals[i] = error_mark_node; + + if (known_contexts_ptr) + (*known_contexts_ptr)[i] + = ipa_context_from_jfunc (caller_parms_info, e, i, jf); + + known_aggs[i] = ipa_agg_value_set_from_jfunc (caller_parms_info, + caller, &jf->agg); + } + else + gcc_assert (callee->thunk.thunk_p); } else if (e->call_stmt && !e->call_stmt_cannot_inline_p && ((clause_ptr && info->conds) || known_vals_ptr)) @@ -516,7 +546,7 @@ evaluate_properties_for_edge (struct cgr if (known_aggs_ptr) *known_aggs_ptr = known_aggs; else - known_aggs.release (); + ipa_release_agg_values (known_aggs); } @@ -527,6 +557,8 @@ ipa_fn_summary_alloc (void) { gcc_checking_assert (!ipa_fn_summaries); ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); + ipa_size_summaries = new fast_function_summary + (symtab); ipa_call_summaries = new ipa_call_summary_t (symtab); } @@ -597,7 +629,7 @@ ipa_fn_summary_t::duplicate (cgraph_node { vec *entry = info->size_time_table; /* Use SRC parm info since it may not be copied yet. */ - struct ipa_node_params *parms_info = IPA_NODE_REF (src); + class ipa_node_params *parms_info = IPA_NODE_REF (src); vec known_vals = vNULL; int count = ipa_get_param_count (parms_info); int i, j; @@ -661,7 +693,7 @@ ipa_fn_summary_t::duplicate (cgraph_node for (edge = dst->callees; edge; edge = next) { predicate new_predicate; - struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); + class ipa_call_summary *es = ipa_call_summaries->get_create (edge); next = edge->next_callee; if (!edge->inline_failed) @@ -680,7 +712,7 @@ ipa_fn_summary_t::duplicate (cgraph_node for (edge = dst->indirect_calls; edge; edge = next) { predicate new_predicate; - struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); + class ipa_call_summary *es = ipa_call_summaries->get_create (edge); next = edge->next_callee; gcc_checking_assert (edge->inline_failed); @@ -719,7 +751,7 @@ ipa_fn_summary_t::duplicate (cgraph_node set_hint_predicate (&info->loop_stride, p); } } - if (!dst->global.inlined_to) + if (!dst->inlined_to) ipa_update_overall_fn_summary (dst); } @@ -729,8 +761,8 @@ ipa_fn_summary_t::duplicate (cgraph_node void ipa_call_summary_t::duplicate (struct cgraph_edge *src, struct cgraph_edge *dst, - struct ipa_call_summary *srcinfo, - struct ipa_call_summary *info) + class ipa_call_summary *srcinfo, + class ipa_call_summary *info) { new (info) ipa_call_summary (*srcinfo); info->predicate = NULL; @@ -750,12 +782,12 @@ ipa_call_summary_t::duplicate (struct cg static void dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node, - struct ipa_fn_summary *info) + class ipa_fn_summary *info) { struct cgraph_edge *edge; for (edge = node->callees; edge; edge = edge->next_callee) { - struct ipa_call_summary *es = ipa_call_summaries->get (edge); + class ipa_call_summary *es = ipa_call_summaries->get (edge); struct cgraph_node *callee = edge->callee->ultimate_alias_target (); int i; @@ -768,9 +800,10 @@ dump_ipa_call_summary (FILE *f, int inde es->call_stmt_size, es->call_stmt_time); ipa_fn_summary *s = ipa_fn_summaries->get (callee); + ipa_size_summary *ss = ipa_size_summaries->get (callee); if (s != NULL) - fprintf (f, "callee size:%2i stack:%2i", - (int) (s->size / ipa_fn_summary::size_scale), + fprintf (f, " callee size:%2i stack:%2i", + (int) (ss->size / ipa_fn_summary::size_scale), (int) s->estimated_stack_size); if (es->predicate) @@ -794,19 +827,17 @@ dump_ipa_call_summary (FILE *f, int inde } if (!edge->inline_failed) { - ipa_fn_summary *s = ipa_fn_summaries->get (callee); - fprintf (f, "%*sStack frame offset %i, callee self size %i," - " callee size %i\n", + ipa_size_summary *ss = ipa_size_summaries->get (callee); + fprintf (f, "%*sStack frame offset %i, callee self size %i\n", indent + 2, "", - (int) s->stack_frame_offset, - (int) s->estimated_self_stack_size, - (int) s->estimated_stack_size); + (int) ipa_get_stack_frame_offset (callee), + (int) ss->estimated_self_stack_size); dump_ipa_call_summary (f, indent + 2, callee, info); } } for (edge = node->indirect_calls; edge; edge = edge->next_callee) { - struct ipa_call_summary *es = ipa_call_summaries->get (edge); + class ipa_call_summary *es = ipa_call_summaries->get (edge); fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i" " time: %2i", indent, "", @@ -829,7 +860,8 @@ ipa_dump_fn_summary (FILE *f, struct cgr { if (node->definition) { - struct ipa_fn_summary *s = ipa_fn_summaries->get (node); + class ipa_fn_summary *s = ipa_fn_summaries->get (node); + class ipa_size_summary *ss = ipa_size_summaries->get (node); if (s != NULL) { size_time_entry *e; @@ -842,11 +874,11 @@ ipa_dump_fn_summary (FILE *f, struct cgr if (s->fp_expressions) fprintf (f, " fp_expression"); fprintf (f, "\n global time: %f\n", s->time.to_double ()); - fprintf (f, " self size: %i\n", s->self_size); - fprintf (f, " global size: %i\n", s->size); + fprintf (f, " self size: %i\n", ss->self_size); + fprintf (f, " global size: %i\n", ss->size); fprintf (f, " min size: %i\n", s->min_size); fprintf (f, " self stack: %i\n", - (int) s->estimated_self_stack_size); + (int) ss->estimated_self_stack_size); fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); if (s->growth) fprintf (f, " estimated growth:%i\n", (int) s->growth); @@ -900,7 +932,7 @@ ipa_dump_fn_summaries (FILE *f) struct cgraph_node *node; FOR_EACH_DEFINED_FUNCTION (node) - if (!node->global.inlined_to) + if (!node->inlined_to) ipa_dump_fn_summary (f, node); } @@ -922,7 +954,7 @@ mark_modified (ao_ref *ao ATTRIBUTE_UNUS static tree unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op, - HOST_WIDE_INT *size_p) + poly_int64 *size_p) { /* SSA_NAME referring to parm default def? */ if (TREE_CODE (op) == SSA_NAME @@ -930,7 +962,7 @@ unmodified_parm_1 (ipa_func_body_info *f && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) { if (size_p) - *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op))); + *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); return SSA_NAME_VAR (op); } /* Non-SSA parm reference? */ @@ -951,7 +983,7 @@ unmodified_parm_1 (ipa_func_body_info *f if (!modified) { if (size_p) - *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op))); + *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); return op; } } @@ -965,7 +997,7 @@ unmodified_parm_1 (ipa_func_body_info *f static tree unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op, - HOST_WIDE_INT *size_p) + poly_int64 *size_p) { tree res = unmodified_parm_1 (fbi, stmt, op, size_p); if (res) @@ -990,7 +1022,7 @@ unmodified_parm (ipa_func_body_info *fbi static bool unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi, gimple *stmt, tree op, int *index_p, - HOST_WIDE_INT *size_p, + poly_int64 *size_p, struct agg_position_info *aggpos) { tree res = unmodified_parm_1 (fbi, stmt, op, size_p); @@ -1157,25 +1189,147 @@ eliminated_by_inlining_prob (ipa_func_bo } } +/* Analyze EXPR if it represents a series of simple operations performed on + a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and + AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item. + Type of the parameter or load from an aggregate via the parameter is + stored in *TYPE_P. Operations on the parameter are recorded to + PARAM_OPS_P if it is not NULL. */ + +static bool +decompose_param_expr (struct ipa_func_body_info *fbi, + gimple *stmt, tree expr, + int *index_p, tree *type_p, + struct agg_position_info *aggpos, + expr_eval_ops *param_ops_p = NULL) +{ + int op_limit = PARAM_VALUE (PARAM_IPA_MAX_PARAM_EXPR_OPS); + int op_count = 0; + + if (param_ops_p) + *param_ops_p = NULL; + + while (true) + { + expr_eval_op eval_op; + unsigned rhs_count; + unsigned cst_count = 0; + + if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL, + aggpos)) + { + tree type = TREE_TYPE (expr); + + if (aggpos->agg_contents) + { + /* Stop if containing bit-field. */ + if (TREE_CODE (expr) == BIT_FIELD_REF + || contains_bitfld_component_ref_p (expr)) + break; + } + + *type_p = type; + return true; + } + + if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr)) + break; + + if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr))) + break; + + switch (gimple_assign_rhs_class (stmt)) + { + case GIMPLE_SINGLE_RHS: + expr = gimple_assign_rhs1 (stmt); + continue; + + case GIMPLE_UNARY_RHS: + rhs_count = 1; + break; + + case GIMPLE_BINARY_RHS: + rhs_count = 2; + break; + + case GIMPLE_TERNARY_RHS: + rhs_count = 3; + break; + + default: + goto fail; + } + + /* Stop if expression is too complex. */ + if (op_count++ == op_limit) + break; + + if (param_ops_p) + { + eval_op.code = gimple_assign_rhs_code (stmt); + eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt)); + eval_op.val[0] = NULL_TREE; + eval_op.val[1] = NULL_TREE; + } + + expr = NULL_TREE; + for (unsigned i = 0; i < rhs_count; i++) + { + tree op = gimple_op (stmt, i + 1); + + gcc_assert (op && !TYPE_P (op)); + if (is_gimple_ip_invariant (op)) + { + if (++cst_count == rhs_count) + goto fail; + + eval_op.val[cst_count - 1] = op; + } + else if (!expr) + { + /* Found a non-constant operand, and record its index in rhs + operands. */ + eval_op.index = i; + expr = op; + } + else + { + /* Found more than one non-constant operands. */ + goto fail; + } + } + + if (param_ops_p) + vec_safe_insert (*param_ops_p, 0, eval_op); + } + + /* Failed to decompose, free resource and return. */ +fail: + if (param_ops_p) + vec_free (*param_ops_p); + + return false; +} /* If BB ends by a conditional we can turn into predicates, attach corresponding predicates to the CFG edges. */ static void set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, - struct ipa_fn_summary *summary, + class ipa_fn_summary *summary, + class ipa_node_params *params_summary, basic_block bb) { gimple *last; - tree op; + tree op, op2; int index; - HOST_WIDE_INT size; struct agg_position_info aggpos; enum tree_code code, inverted_code; edge e; edge_iterator ei; gimple *set_stmt; - tree op2; + tree param_type; + expr_eval_ops param_ops; last = last_stmt (bb); if (!last || gimple_code (last) != GIMPLE_COND) @@ -1183,10 +1337,9 @@ set_cond_stmt_execution_predicate (struc if (!is_gimple_ip_invariant (gimple_cond_rhs (last))) return; op = gimple_cond_lhs (last); - /* TODO: handle conditionals like - var = op0 < 4; - if (var != 0). */ - if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos)) + + if (decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, + ¶m_ops)) { code = gimple_cond_code (last); inverted_code = invert_tree_comparison (code, HONOR_NANS (op)); @@ -1197,17 +1350,24 @@ set_cond_stmt_execution_predicate (struc ? code : inverted_code); /* invert_tree_comparison will return ERROR_MARK on FP comparsions that are not EQ/NE instead of returning proper - unordered one. Be sure it is not confused with NON_CONSTANT. */ - if (this_code != ERROR_MARK) + unordered one. Be sure it is not confused with NON_CONSTANT. + + And if the edge's target is the final block of diamond CFG graph + of this conditional statement, we do not need to compute + predicate for the edge because the final block's predicate must + be at least as that of the first block of the statement. */ + if (this_code != ERROR_MARK + && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) { predicate p - = add_condition (summary, index, size, &aggpos, this_code, - unshare_expr_without_location - (gimple_cond_rhs (last))); + = add_condition (summary, params_summary, index, + param_type, &aggpos, + this_code, gimple_cond_rhs (last), param_ops); e->aux = edge_predicate_pool.allocate (); *(predicate *) e->aux = p; } } + vec_free (param_ops); } if (TREE_CODE (op) != SSA_NAME) @@ -1230,12 +1390,12 @@ set_cond_stmt_execution_predicate (struc || gimple_call_num_args (set_stmt) != 1) return; op2 = gimple_call_arg (set_stmt, 0); - if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size, - &aggpos)) + if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos)) return; FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) { - predicate p = add_condition (summary, index, size, &aggpos, + predicate p = add_condition (summary, params_summary, index, + param_type, &aggpos, predicate::is_not_constant, NULL_TREE); e->aux = edge_predicate_pool.allocate (); *(predicate *) e->aux = p; @@ -1248,63 +1408,200 @@ set_cond_stmt_execution_predicate (struc static void set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi, - struct ipa_fn_summary *summary, + class ipa_fn_summary *summary, + class ipa_node_params *params_summary, basic_block bb) { gimple *lastg; tree op; int index; - HOST_WIDE_INT size; struct agg_position_info aggpos; edge e; edge_iterator ei; size_t n; size_t case_idx; + tree param_type; + expr_eval_ops param_ops; lastg = last_stmt (bb); if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH) return; gswitch *last = as_a (lastg); op = gimple_switch_index (last); - if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos)) + if (!decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, + ¶m_ops)) return; + auto_vec > ranges; + tree type = TREE_TYPE (op); + int bound_limit = PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS); + int bound_count = 0; + wide_int vr_wmin, vr_wmax; + value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax); + FOR_EACH_EDGE (e, ei, bb->succs) { e->aux = edge_predicate_pool.allocate (); *(predicate *) e->aux = false; } + + e = gimple_switch_edge (cfun, last, 0); + /* Set BOUND_COUNT to maximum count to bypass computing predicate for + default case if its target basic block is in convergence point of all + switch cases, which can be determined by checking whether it + post-dominates the switch statement. */ + if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) + bound_count = INT_MAX; + n = gimple_switch_num_labels (last); - for (case_idx = 0; case_idx < n; ++case_idx) + for (case_idx = 1; case_idx < n; ++case_idx) { tree cl = gimple_switch_label (last, case_idx); - tree min, max; + tree min = CASE_LOW (cl); + tree max = CASE_HIGH (cl); predicate p; e = gimple_switch_edge (cfun, last, case_idx); - min = CASE_LOW (cl); - max = CASE_HIGH (cl); - /* For default we might want to construct predicate that none - of cases is met, but it is bit hard to do not having negations - of conditionals handy. */ - if (!min && !max) + /* The case value might not have same type as switch expression, + extend the value based on the expression type. */ + if (TREE_TYPE (min) != type) + min = wide_int_to_tree (type, wi::to_wide (min)); + + if (!max) + max = min; + else if (TREE_TYPE (max) != type) + max = wide_int_to_tree (type, wi::to_wide (max)); + + /* The case's target basic block is in convergence point of all switch + cases, its predicate should be at least as that of the switch + statement. */ + if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) p = true; - else if (!max) - p = add_condition (summary, index, size, &aggpos, EQ_EXPR, - unshare_expr_without_location (min)); + else if (min == max) + p = add_condition (summary, params_summary, index, param_type, + &aggpos, EQ_EXPR, min, param_ops); else { predicate p1, p2; - p1 = add_condition (summary, index, size, &aggpos, GE_EXPR, - unshare_expr_without_location (min)); - p2 = add_condition (summary, index, size, &aggpos, LE_EXPR, - unshare_expr_without_location (max)); + p1 = add_condition (summary, params_summary, index, param_type, + &aggpos, GE_EXPR, min, param_ops); + p2 = add_condition (summary, params_summary,index, param_type, + &aggpos, LE_EXPR, max, param_ops); p = p1 & p2; } - *(struct predicate *) e->aux - = p.or_with (summary->conds, *(struct predicate *) e->aux); + *(class predicate *) e->aux + = p.or_with (summary->conds, *(class predicate *) e->aux); + + /* If there are too many disjoint case ranges, predicate for default + case might become too complicated. So add a limit here. */ + if (bound_count > bound_limit) + continue; + + bool new_range = true; + + if (!ranges.is_empty ()) + { + wide_int curr_wmin = wi::to_wide (min); + wide_int last_wmax = wi::to_wide (ranges.last ().second); + + /* Merge case ranges if they are continuous. */ + if (curr_wmin == last_wmax + 1) + new_range = false; + else if (vr_type == VR_ANTI_RANGE) + { + /* If two disjoint case ranges can be connected by anti-range + of switch index, combine them to one range. */ + if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type))) + vr_type = VR_UNDEFINED; + else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type))) + new_range = false; + } + } + + /* Create/extend a case range. And we count endpoints of range set, + this number nearly equals to number of conditions that we will create + for predicate of default case. */ + if (new_range) + { + bound_count += (min == max) ? 1 : 2; + ranges.safe_push (std::make_pair (min, max)); + } + else + { + bound_count += (ranges.last ().first == ranges.last ().second); + ranges.last ().second = max; + } + } + + e = gimple_switch_edge (cfun, last, 0); + if (bound_count > bound_limit) + { + *(class predicate *) e->aux = true; + vec_free (param_ops); + return; + } + + predicate p_seg = true; + predicate p_all = false; + + if (vr_type != VR_RANGE) + { + vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type)); + vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type)); } + + /* Construct predicate to represent default range set that is negation of + all case ranges. Case range is classified as containing single/non-single + values. Suppose a piece of case ranges in the following. + + [D1...D2] [S1] ... [Sn] [D3...D4] + + To represent default case's range sets between two non-single value + case ranges (From D2 to D3), we construct predicate as: + + D2 < x < D3 && x != S1 && ... && x != Sn + */ + for (size_t i = 0; i < ranges.length (); i++) + { + tree min = ranges[i].first; + tree max = ranges[i].second; + + if (min == max) + p_seg &= add_condition (summary, params_summary, index, + param_type, &aggpos, NE_EXPR, + min, param_ops); + else + { + /* Do not create sub-predicate for range that is beyond low bound + of switch index. */ + if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type))) + { + p_seg &= add_condition (summary, params_summary, index, + param_type, &aggpos, + LT_EXPR, min, param_ops); + p_all = p_all.or_with (summary->conds, p_seg); + } + + /* Do not create sub-predicate for range that is beyond up bound + of switch index. */ + if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type))) + { + p_seg = false; + break; + } + + p_seg = add_condition (summary, params_summary, index, + param_type, &aggpos, GT_EXPR, + max, param_ops); + } + } + + p_all = p_all.or_with (summary->conds, p_seg); + *(class predicate *) e->aux + = p_all.or_with (summary->conds, *(class predicate *) e->aux); + + vec_free (param_ops); } @@ -1314,7 +1611,8 @@ set_switch_stmt_execution_predicate (str static void compute_bb_predicates (struct ipa_func_body_info *fbi, struct cgraph_node *node, - struct ipa_fn_summary *summary) + class ipa_fn_summary *summary, + class ipa_node_params *params_summary) { struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); bool done = false; @@ -1322,8 +1620,8 @@ compute_bb_predicates (struct ipa_func_b FOR_EACH_BB_FN (bb, my_function) { - set_cond_stmt_execution_predicate (fbi, summary, bb); - set_switch_stmt_execution_predicate (fbi, summary, bb); + set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb); + set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb); } /* Entry block is always executable. */ @@ -1348,16 +1646,16 @@ compute_bb_predicates (struct ipa_func_b predicate this_bb_predicate = *(predicate *) e->src->aux; if (e->aux) - this_bb_predicate &= (*(struct predicate *) e->aux); + this_bb_predicate &= (*(class predicate *) e->aux); p = p.or_with (summary->conds, this_bb_predicate); if (p == true) break; } } - if (p == false) - gcc_checking_assert (!bb->aux); - else + if (p != false) { + basic_block pdom_bb; + if (!bb->aux) { done = false; @@ -1376,6 +1674,34 @@ compute_bb_predicates (struct ipa_func_b *((predicate *) bb->aux) = p; } } + + /* For switch/if statement, we can OR-combine predicates of all + its cases/branches to get predicate for basic block in their + convergence point, but sometimes this will generate very + complicated predicate. Actually, we can get simplified + predicate in another way by using the fact that predicate + for a basic block must also hold true for its post dominators. + To be specific, basic block in convergence point of + conditional statement should include predicate of the + statement. */ + pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb); + if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb) + ; + else if (!pdom_bb->aux) + { + done = false; + pdom_bb->aux = edge_predicate_pool.allocate (); + *((predicate *) pdom_bb->aux) = p; + } + else if (p != *(predicate *) pdom_bb->aux) + { + p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux); + if (p != *(predicate *) pdom_bb->aux) + { + done = false; + *((predicate *) pdom_bb->aux) = p; + } + } } } } @@ -1387,21 +1713,21 @@ compute_bb_predicates (struct ipa_func_b static predicate will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi, - struct ipa_fn_summary *summary, + class ipa_fn_summary *summary, + class ipa_node_params *params_summary, tree expr, vec nonconstant_names) { tree parm; int index; - HOST_WIDE_INT size; while (UNARY_CLASS_P (expr)) expr = TREE_OPERAND (expr, 0); - parm = unmodified_parm (fbi, NULL, expr, &size); + parm = unmodified_parm (fbi, NULL, expr, NULL); if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) - return add_condition (summary, index, size, NULL, predicate::changed, - NULL_TREE); + return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL, + predicate::changed, NULL_TREE); if (is_gimple_min_invariant (expr)) return false; if (TREE_CODE (expr) == SSA_NAME) @@ -1410,6 +1736,7 @@ will_be_nonconstant_expr_predicate (ipa_ { predicate p1 = will_be_nonconstant_expr_predicate (fbi, summary, + params_summary, TREE_OPERAND (expr, 0), nonconstant_names); if (p1 == true) @@ -1417,6 +1744,7 @@ will_be_nonconstant_expr_predicate (ipa_ predicate p2 = will_be_nonconstant_expr_predicate (fbi, summary, + params_summary, TREE_OPERAND (expr, 1), nonconstant_names); return p1.or_with (summary->conds, p2); @@ -1425,6 +1753,7 @@ will_be_nonconstant_expr_predicate (ipa_ { predicate p1 = will_be_nonconstant_expr_predicate (fbi, summary, + params_summary, TREE_OPERAND (expr, 0), nonconstant_names); if (p1 == true) @@ -1432,12 +1761,14 @@ will_be_nonconstant_expr_predicate (ipa_ predicate p2 = will_be_nonconstant_expr_predicate (fbi, summary, + params_summary, TREE_OPERAND (expr, 1), nonconstant_names); if (p2 == true) return p2; p1 = p1.or_with (summary->conds, p2); p2 = will_be_nonconstant_expr_predicate (fbi, summary, + params_summary, TREE_OPERAND (expr, 2), nonconstant_names); return p2.or_with (summary->conds, p1); @@ -1458,17 +1789,18 @@ will_be_nonconstant_expr_predicate (ipa_ static predicate will_be_nonconstant_predicate (struct ipa_func_body_info *fbi, - struct ipa_fn_summary *summary, + class ipa_fn_summary *summary, + class ipa_node_params *params_summary, gimple *stmt, vec nonconstant_names) { predicate p = true; ssa_op_iter iter; tree use; + tree param_type = NULL_TREE; predicate op_non_const; bool is_load; int base_index; - HOST_WIDE_INT size; struct agg_position_info aggpos; /* What statments might be optimized away @@ -1489,11 +1821,9 @@ will_be_nonconstant_predicate (struct ip /* Loads can be optimized when the value is known. */ if (is_load) { - tree op; - gcc_assert (gimple_assign_single_p (stmt)); - op = gimple_assign_rhs1 (stmt); - if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size, - &aggpos)) + tree op = gimple_assign_rhs1 (stmt); + if (!decompose_param_expr (fbi, stmt, op, &base_index, ¶m_type, + &aggpos)) return p; } else @@ -1518,21 +1848,22 @@ will_be_nonconstant_predicate (struct ip if (is_load) op_non_const = - add_condition (summary, base_index, size, &aggpos, predicate::changed, - NULL); + add_condition (summary, params_summary, + base_index, param_type, &aggpos, + predicate::changed, NULL_TREE); else op_non_const = false; FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) { - HOST_WIDE_INT size; - tree parm = unmodified_parm (fbi, stmt, use, &size); + tree parm = unmodified_parm (fbi, stmt, use, NULL); int index; if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) { if (index != base_index) - p = add_condition (summary, index, size, NULL, predicate::changed, - NULL_TREE); + p = add_condition (summary, params_summary, index, + TREE_TYPE (parm), NULL, + predicate::changed, NULL_TREE); else continue; } @@ -1566,7 +1897,7 @@ struct record_modified_bb_info static basic_block get_minimal_bb (basic_block init_bb, basic_block use_bb) { - struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); + class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); if (l && l->header->count < init_bb->count) return l->header; return init_bb; @@ -1664,7 +1995,7 @@ param_change_prob (ipa_func_body_info *f return REG_BR_PROB_BASE; if (dump_file) { - fprintf (dump_file, " Analyzing param change probablity of "); + fprintf (dump_file, " Analyzing param change probability of "); print_generic_expr (dump_file, op, TDF_SLIM); fprintf (dump_file, "\n"); } @@ -1718,7 +2049,9 @@ param_change_prob (ipa_func_body_info *f static bool phi_result_unknown_predicate (ipa_func_body_info *fbi, - ipa_fn_summary *summary, basic_block bb, + ipa_fn_summary *summary, + class ipa_node_params *params_summary, + basic_block bb, predicate *p, vec nonconstant_names) { @@ -1762,7 +2095,7 @@ phi_result_unknown_predicate (ipa_func_b || !is_gimple_ip_invariant (gimple_cond_rhs (stmt))) return false; - *p = will_be_nonconstant_expr_predicate (fbi, summary, + *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary, gimple_cond_lhs (stmt), nonconstant_names); if (*p == true) @@ -1777,7 +2110,7 @@ phi_result_unknown_predicate (ipa_func_b NONCONSTANT_NAMES, if possible. */ static void -predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi, +predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi, predicate *p, vec nonconstant_names) { @@ -1954,7 +2287,8 @@ analyze_function_body (struct cgraph_nod basic_block bb; struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); sreal freq; - struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); + class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); + class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node); predicate bb_predicate; struct ipa_func_body_info fbi; vec nonconstant_names = vNULL; @@ -1980,6 +2314,7 @@ analyze_function_body (struct cgraph_nod if (opt_for_fn (node->decl, optimize)) { calculate_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_POST_DOMINATORS); if (!early) loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); else @@ -2019,7 +2354,7 @@ analyze_function_body (struct cgraph_nod bb_predicate); if (fbi.info) - compute_bb_predicates (&fbi, node, info); + compute_bb_predicates (&fbi, node, info, params_summary); order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); nblocks = pre_and_rev_post_order_compute (NULL, order, false); for (n = 0; n < nblocks; n++) @@ -2061,7 +2396,9 @@ analyze_function_body (struct cgraph_nod gsi_next (&bsi)) { if (first_phi - && !phi_result_unknown_predicate (&fbi, info, bb, + && !phi_result_unknown_predicate (&fbi, info, + params_summary, + bb, &phi_predicate, nonconstant_names)) break; @@ -2159,7 +2496,7 @@ analyze_function_body (struct cgraph_nod just maximum of the possible paths. */ if (fbi.info) will_be_nonconstant - = will_be_nonconstant_predicate (&fbi, info, + = will_be_nonconstant_predicate (&fbi, info, params_summary, stmt, nonconstant_names); else will_be_nonconstant = true; @@ -2174,7 +2511,7 @@ analyze_function_body (struct cgraph_nod if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\t\tWill be eliminated by inlining\n"); - struct predicate p = bb_predicate & will_be_nonconstant; + class predicate p = bb_predicate & will_be_nonconstant; /* We can ignore statement when we proved it is never going to happen, but we cannot do that for call statements @@ -2226,7 +2563,8 @@ analyze_function_body (struct cgraph_nod predicate p = bb_predicate; if (fbi.info) p = p & will_be_nonconstant_expr_predicate - (&fbi, info, TREE_OPERAND (op, 1), + (&fbi, info, params_summary, + TREE_OPERAND (op, 1), nonconstant_names); if (p != false) { @@ -2249,7 +2587,7 @@ analyze_function_body (struct cgraph_nod if (nonconstant_names.exists () && !early) { - struct loop *loop; + class loop *loop; predicate loop_iterations = true; predicate loop_stride = true; @@ -2261,7 +2599,7 @@ analyze_function_body (struct cgraph_nod vec exits; edge ex; unsigned int j; - struct tree_niter_desc niter_desc; + class tree_niter_desc niter_desc; bb_predicate = *(predicate *) loop->header->aux; exits = get_loop_exit_edges (loop); @@ -2271,6 +2609,7 @@ analyze_function_body (struct cgraph_nod { predicate will_be_nonconstant = will_be_nonconstant_expr_predicate (&fbi, info, + params_summary, niter_desc.niter, nonconstant_names); if (will_be_nonconstant != true) @@ -2315,7 +2654,9 @@ analyze_function_body (struct cgraph_nod continue; predicate will_be_nonconstant - = will_be_nonconstant_expr_predicate (&fbi, info, iv.step, + = will_be_nonconstant_expr_predicate (&fbi, info, + params_summary, + iv.step, nonconstant_names); if (will_be_nonconstant != true) will_be_nonconstant = bb_predicate & will_be_nonconstant; @@ -2349,8 +2690,9 @@ analyze_function_body (struct cgraph_nod } } ipa_fn_summary *s = ipa_fn_summaries->get (node); + ipa_size_summary *ss = ipa_size_summaries->get (node); s->time = time; - s->self_size = size; + ss->self_size = size; nonconstant_names.release (); ipa_release_body_info (&fbi); if (opt_for_fn (node->decl, optimize)) @@ -2360,6 +2702,7 @@ analyze_function_body (struct cgraph_nod else if (!ipa_edge_args_sum) ipa_free_all_node_params (); free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); } if (dump_file) { @@ -2377,9 +2720,8 @@ compute_fn_summary (struct cgraph_node * { HOST_WIDE_INT self_stack_size; struct cgraph_edge *e; - struct ipa_fn_summary *info; - gcc_assert (!node->global.inlined_to); + gcc_assert (!node->inlined_to); if (!ipa_fn_summaries) ipa_fn_summary_alloc (); @@ -2387,14 +2729,14 @@ compute_fn_summary (struct cgraph_node * /* Create a new ipa_fn_summary. */ ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node); ipa_fn_summaries->remove (node); - info = ipa_fn_summaries->get_create (node); + class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); + class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); /* Estimate the stack size for the function if we're optimizing. */ self_stack_size = optimize && !node->thunk.thunk_p ? estimated_stack_frame_size (node) : 0; - info->estimated_self_stack_size = self_stack_size; + size_info->estimated_self_stack_size = self_stack_size; info->estimated_stack_size = self_stack_size; - info->stack_frame_offset = 0; if (node->thunk.thunk_p) { @@ -2412,7 +2754,7 @@ compute_fn_summary (struct cgraph_node * t = predicate::not_inlined (); info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t); ipa_update_overall_fn_summary (node); - info->self_size = info->size; + size_info->self_size = size_info->size; if (stdarg_p (TREE_TYPE (node->decl))) { info->inlinable = false; @@ -2468,16 +2810,15 @@ compute_fn_summary (struct cgraph_node * node->calls_comdat_local = (e != NULL); /* Inlining characteristics are maintained by the cgraph_mark_inline. */ - info->size = info->self_size; - info->stack_frame_offset = 0; - info->estimated_stack_size = info->estimated_self_stack_size; + size_info->size = size_info->self_size; + info->estimated_stack_size = size_info->estimated_self_stack_size; /* Code above should compute exactly the same result as ipa_update_overall_fn_summary but because computation happens in different order the roundoff errors result in slight changes. */ ipa_update_overall_fn_summary (node); /* In LTO mode we may have speculative edges set. */ - gcc_assert (in_lto_p || info->size == info->self_size); + gcc_assert (in_lto_p || size_info->size == size_info->self_size); } @@ -2499,11 +2840,11 @@ estimate_edge_devirt_benefit (struct cgr int *size, int *time, vec known_vals, vec known_contexts, - vec known_aggs) + vec known_aggs) { tree target; struct cgraph_node *callee; - struct ipa_fn_summary *isummary; + class ipa_fn_summary *isummary; enum availability avail; bool speculative; @@ -2548,10 +2889,10 @@ estimate_edge_size_and_time (struct cgra int prob, vec known_vals, vec known_contexts, - vec known_aggs, + vec known_aggs, ipa_hints *hints) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); + class ipa_call_summary *es = ipa_call_summaries->get (e); int call_size = es->call_stmt_size; int call_time = es->call_stmt_time; int cur_size; @@ -2583,12 +2924,12 @@ estimate_calls_size_and_time (struct cgr clause_t possible_truths, vec known_vals, vec known_contexts, - vec known_aggs) + vec known_aggs) { struct cgraph_edge *e; for (e = node->callees; e; e = e->next_callee) { - struct ipa_call_summary *es = ipa_call_summaries->get_create (e); + class ipa_call_summary *es = ipa_call_summaries->get_create (e); /* Do not care about zero sized builtins. */ if (e->inline_failed && !es->call_stmt_size) @@ -2619,7 +2960,7 @@ estimate_calls_size_and_time (struct cgr } for (e = node->indirect_calls; e; e = e->next_callee) { - struct ipa_call_summary *es = ipa_call_summaries->get_create (e); + class ipa_call_summary *es = ipa_call_summaries->get_create (e); if (!es->predicate || es->predicate->evaluate (possible_truths)) estimate_edge_size_and_time (e, size, @@ -2630,31 +2971,250 @@ estimate_calls_size_and_time (struct cgr } } +/* Default constructor for ipa call context. + Memory alloction of known_vals, known_contexts + and known_aggs vectors is owned by the caller, but can + be release by ipa_call_context::release. + + inline_param_summary is owned by the caller. */ +ipa_call_context::ipa_call_context (cgraph_node *node, + clause_t possible_truths, + clause_t nonspec_possible_truths, + vec known_vals, + vec + known_contexts, + vec known_aggs, + vec + inline_param_summary) +: m_node (node), m_possible_truths (possible_truths), + m_nonspec_possible_truths (nonspec_possible_truths), + m_inline_param_summary (inline_param_summary), + m_known_vals (known_vals), + m_known_contexts (known_contexts), + m_known_aggs (known_aggs) +{ +} + +/* Set THIS to be a duplicate of CTX. Copy all relevant info. */ + +void +ipa_call_context::duplicate_from (const ipa_call_context &ctx) +{ + m_node = ctx.m_node; + m_possible_truths = ctx.m_possible_truths; + m_nonspec_possible_truths = ctx.m_nonspec_possible_truths; + class ipa_node_params *params_summary = IPA_NODE_REF (m_node); + unsigned int nargs = params_summary + ? ipa_get_param_count (params_summary) : 0; + + m_inline_param_summary = vNULL; + /* Copy the info only if there is at least one useful entry. */ + if (ctx.m_inline_param_summary.exists ()) + { + unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs); + + for (unsigned int i = 0; i < n; i++) + if (ipa_is_param_used_by_ipa_predicates (params_summary, i) + && !ctx.m_inline_param_summary[i].useless_p ()) + { + m_inline_param_summary + = ctx.m_inline_param_summary.copy (); + break; + } + } + m_known_vals = vNULL; + if (ctx.m_known_vals.exists ()) + { + unsigned int n = MIN (ctx.m_known_vals.length (), nargs); + + for (unsigned int i = 0; i < n; i++) + if (ipa_is_param_used_by_indirect_call (params_summary, i) + && ctx.m_known_vals[i]) + { + m_known_vals = ctx.m_known_vals.copy (); + break; + } + } + + m_known_contexts = vNULL; + if (ctx.m_known_contexts.exists ()) + { + unsigned int n = MIN (ctx.m_known_contexts.length (), nargs); + + for (unsigned int i = 0; i < n; i++) + if (ipa_is_param_used_by_polymorphic_call (params_summary, i) + && !ctx.m_known_contexts[i].useless_p ()) + { + m_known_contexts = ctx.m_known_contexts.copy (); + break; + } + } + + m_known_aggs = vNULL; + if (ctx.m_known_aggs.exists ()) + { + unsigned int n = MIN (ctx.m_known_aggs.length (), nargs); + + for (unsigned int i = 0; i < n; i++) + if (ipa_is_param_used_by_indirect_call (params_summary, i) + && !ctx.m_known_aggs[i].is_empty ()) + { + m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs); + break; + } + } +} + +/* Release memory used by known_vals/contexts/aggs vectors. + If ALL is true release also inline_param_summary. + This happens when context was previously duplciated to be stored + into cache. */ + +void +ipa_call_context::release (bool all) +{ + /* See if context is initialized at first place. */ + if (!m_node) + return; + m_known_vals.release (); + m_known_contexts.release (); + ipa_release_agg_values (m_known_aggs); + if (all) + m_inline_param_summary.release (); +} + +/* Return true if CTX describes the same call context as THIS. */ + +bool +ipa_call_context::equal_to (const ipa_call_context &ctx) +{ + if (m_node != ctx.m_node + || m_possible_truths != ctx.m_possible_truths + || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths) + return false; + + class ipa_node_params *params_summary = IPA_NODE_REF (m_node); + unsigned int nargs = params_summary + ? ipa_get_param_count (params_summary) : 0; + + if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ()) + { + for (unsigned int i = 0; i < nargs; i++) + { + if (!ipa_is_param_used_by_ipa_predicates (params_summary, i)) + continue; + if (i >= m_inline_param_summary.length () + || m_inline_param_summary[i].useless_p ()) + { + if (i < ctx.m_inline_param_summary.length () + && !ctx.m_inline_param_summary[i].useless_p ()) + return false; + continue; + } + if (i >= ctx.m_inline_param_summary.length () + || ctx.m_inline_param_summary[i].useless_p ()) + { + if (i < m_inline_param_summary.length () + && !m_inline_param_summary[i].useless_p ()) + return false; + continue; + } + if (!m_inline_param_summary[i].equal_to + (ctx.m_inline_param_summary[i])) + return false; + } + } + if (m_known_vals.exists () || ctx.m_known_vals.exists ()) + { + for (unsigned int i = 0; i < nargs; i++) + { + if (!ipa_is_param_used_by_indirect_call (params_summary, i)) + continue; + if (i >= m_known_vals.length () || !m_known_vals[i]) + { + if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i]) + return false; + continue; + } + if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i]) + { + if (i < m_known_vals.length () && m_known_vals[i]) + return false; + continue; + } + if (m_known_vals[i] != ctx.m_known_vals[i]) + return false; + } + } + if (m_known_contexts.exists () || ctx.m_known_contexts.exists ()) + { + for (unsigned int i = 0; i < nargs; i++) + { + if (!ipa_is_param_used_by_polymorphic_call (params_summary, i)) + continue; + if (i >= m_known_contexts.length () + || m_known_contexts[i].useless_p ()) + { + if (i < ctx.m_known_contexts.length () + && !ctx.m_known_contexts[i].useless_p ()) + return false; + continue; + } + if (i >= ctx.m_known_contexts.length () + || ctx.m_known_contexts[i].useless_p ()) + { + if (i < m_known_contexts.length () + && !m_known_contexts[i].useless_p ()) + return false; + continue; + } + if (!m_known_contexts[i].equal_to + (ctx.m_known_contexts[i])) + return false; + } + } + if (m_known_aggs.exists () || ctx.m_known_aggs.exists ()) + { + for (unsigned int i = 0; i < nargs; i++) + { + if (!ipa_is_param_used_by_indirect_call (params_summary, i)) + continue; + if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ()) + { + if (i < ctx.m_known_aggs.length () + && !ctx.m_known_aggs[i].is_empty ()) + return false; + continue; + } + if (i >= ctx.m_known_aggs.length () + || ctx.m_known_aggs[i].is_empty ()) + { + if (i < m_known_aggs.length () + && !m_known_aggs[i].is_empty ()) + return false; + continue; + } + if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i])) + return false; + } + } + return true; +} -/* Estimate size and time needed to execute NODE assuming - POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS - information about NODE's arguments. If non-NULL use also probability - information present in INLINE_PARAM_SUMMARY vector. +/* Estimate size and time needed to execute call in the given context. Additionally detemine hints determined by the context. Finally compute minimal size needed for the call that is independent on the call context and can be used for fast estimates. Return the values in RET_SIZE, RET_MIN_SIZE, RET_TIME and RET_HINTS. */ void -estimate_node_size_and_time (struct cgraph_node *node, - clause_t possible_truths, - clause_t nonspec_possible_truths, - vec known_vals, - vec known_contexts, - vec known_aggs, - int *ret_size, int *ret_min_size, - sreal *ret_time, - sreal *ret_nonspecialized_time, - ipa_hints *ret_hints, - vec - inline_param_summary) +ipa_call_context::estimate_size_and_time (int *ret_size, + int *ret_min_size, + sreal *ret_time, + sreal *ret_nonspecialized_time, + ipa_hints *ret_hints) { - struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); + class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node); size_time_entry *e; int size = 0; sreal time = 0; @@ -2666,13 +3226,13 @@ estimate_node_size_and_time (struct cgra { bool found = false; fprintf (dump_file, " Estimating body: %s/%i\n" - " Known to be false: ", node->name (), - node->order); + " Known to be false: ", m_node->name (), + m_node->order); for (i = predicate::not_inlined_condition; i < (predicate::first_dynamic_condition + (int) vec_safe_length (info->conds)); i++) - if (!(possible_truths & (1 << i))) + if (!(m_possible_truths & (1 << i))) { if (found) fprintf (dump_file, ", "); @@ -2681,19 +3241,19 @@ estimate_node_size_and_time (struct cgra } } - estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths, - known_vals, known_contexts, known_aggs); + estimate_calls_size_and_time (m_node, &size, &min_size, &time, &hints, m_possible_truths, + m_known_vals, m_known_contexts, m_known_aggs); sreal nonspecialized_time = time; for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) { - bool exec = e->exec_predicate.evaluate (nonspec_possible_truths); + bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths); /* Because predicates are conservative, it can happen that nonconst is 1 but exec is 0. */ if (exec) { - bool nonconst = e->nonconst_predicate.evaluate (possible_truths); + bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths); gcc_checking_assert (e->time >= 0); gcc_checking_assert (time >= 0); @@ -2709,7 +3269,7 @@ estimate_node_size_and_time (struct cgra nonspecialized_time += e->time; if (!nonconst) ; - else if (!inline_param_summary.exists ()) + else if (!m_inline_param_summary.exists ()) { if (nonconst) time += e->time; @@ -2717,8 +3277,8 @@ estimate_node_size_and_time (struct cgra else { int prob = e->nonconst_predicate.probability - (info->conds, possible_truths, - inline_param_summary); + (info->conds, m_possible_truths, + m_inline_param_summary); gcc_checking_assert (prob >= 0); gcc_checking_assert (prob <= REG_BR_PROB_BASE); time += e->time * prob / REG_BR_PROB_BASE; @@ -2742,14 +3302,14 @@ estimate_node_size_and_time (struct cgra time = nonspecialized_time; if (info->loop_iterations - && !info->loop_iterations->evaluate (possible_truths)) + && !info->loop_iterations->evaluate (m_possible_truths)) hints |= INLINE_HINT_loop_iterations; if (info->loop_stride - && !info->loop_stride->evaluate (possible_truths)) + && !info->loop_stride->evaluate (m_possible_truths)) hints |= INLINE_HINT_loop_stride; if (info->scc_no) hints |= INLINE_HINT_in_scc; - if (DECL_DECLARED_INLINE_P (node->decl)) + if (DECL_DECLARED_INLINE_P (m_node->decl)) hints |= INLINE_HINT_declared_inline; size = RDIV (size, ipa_fn_summary::size_scale); @@ -2782,7 +3342,7 @@ estimate_ipcp_clone_size_and_time (struc vec known_vals, vec known_contexts, - vec known_aggs, + vec known_aggs, int *ret_size, sreal *ret_time, sreal *ret_nonspec_time, ipa_hints *hints) @@ -2791,10 +3351,31 @@ estimate_ipcp_clone_size_and_time (struc evaluate_conditions_for_known_args (node, false, known_vals, known_aggs, &clause, &nonspec_clause); - estimate_node_size_and_time (node, clause, nonspec_clause, - known_vals, known_contexts, - known_aggs, ret_size, NULL, ret_time, - ret_nonspec_time, hints, vNULL); + ipa_call_context ctx (node, clause, nonspec_clause, + known_vals, known_contexts, + known_aggs, vNULL); + ctx.estimate_size_and_time (ret_size, NULL, ret_time, + ret_nonspec_time, hints); +} + +/* Return stack frame offset where frame of NODE is supposed to start inside + of the function it is inlined to. + Return 0 for functions that are not inlined. */ + +HOST_WIDE_INT +ipa_get_stack_frame_offset (struct cgraph_node *node) +{ + HOST_WIDE_INT offset = 0; + if (!node->inlined_to) + return 0; + node = node->callers->caller; + while (true) + { + offset += ipa_size_summaries->get (node)->estimated_self_stack_size; + if (!node->inlined_to) + return offset; + node = node->callers->caller; + } } @@ -2805,19 +3386,7 @@ static void inline_update_callee_summaries (struct cgraph_node *node, int depth) { struct cgraph_edge *e; - ipa_fn_summary *callee_info = ipa_fn_summaries->get (node); - ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller); - HOST_WIDE_INT peak; - - callee_info->stack_frame_offset - = caller_info->stack_frame_offset - + caller_info->estimated_self_stack_size; - peak = callee_info->stack_frame_offset - + callee_info->estimated_self_stack_size; - - ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to); - if (s->estimated_stack_size < peak) - s->estimated_stack_size = peak; + ipa_propagate_frequency (node); for (e = node->callees; e; e = e->next_callee) { @@ -2830,7 +3399,7 @@ inline_update_callee_summaries (struct c } /* Update change_prob of EDGE after INLINED_EDGE has been inlined. - When functoin A is inlined in B and A calls C with parameter that + When function A is inlined in B and A calls C with parameter that changes with probability PROB1 and C is known to be passthroug of argument if B that change with probability PROB2, the probability of change is now PROB1*PROB2. */ @@ -2842,9 +3411,11 @@ remap_edge_change_prob (struct cgraph_ed if (ipa_node_params_sum) { int i; - struct ipa_edge_args *args = IPA_EDGE_REF (edge); - struct ipa_call_summary *es = ipa_call_summaries->get (edge); - struct ipa_call_summary *inlined_es + class ipa_edge_args *args = IPA_EDGE_REF (edge); + if (!args) + return; + class ipa_call_summary *es = ipa_call_summaries->get (edge); + class ipa_call_summary *inlined_es = ipa_call_summaries->get (inlined_edge); if (es->param.length () == 0) @@ -2885,8 +3456,9 @@ remap_edge_change_prob (struct cgraph_ed static void remap_edge_summaries (struct cgraph_edge *inlined_edge, struct cgraph_node *node, - struct ipa_fn_summary *info, - struct ipa_fn_summary *callee_info, + class ipa_fn_summary *info, + class ipa_node_params *params_summary, + class ipa_fn_summary *callee_info, vec operand_map, vec offset_map, clause_t possible_truths, @@ -2895,18 +3467,19 @@ remap_edge_summaries (struct cgraph_edge struct cgraph_edge *e, *next; for (e = node->callees; e; e = next) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); predicate p; next = e->next_callee; if (e->inline_failed) { + class ipa_call_summary *es = ipa_call_summaries->get (e); remap_edge_change_prob (inlined_edge, e); if (es->predicate) { p = es->predicate->remap_after_inlining - (info, callee_info, operand_map, + (info, params_summary, + callee_info, operand_map, offset_map, possible_truths, *toplev_predicate); edge_set_predicate (e, &p); @@ -2915,13 +3488,14 @@ remap_edge_summaries (struct cgraph_edge edge_set_predicate (e, toplev_predicate); } else - remap_edge_summaries (inlined_edge, e->callee, info, callee_info, + remap_edge_summaries (inlined_edge, e->callee, info, + params_summary, callee_info, operand_map, offset_map, possible_truths, toplev_predicate); } for (e = node->indirect_calls; e; e = next) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); + class ipa_call_summary *es = ipa_call_summaries->get (e); predicate p; next = e->next_callee; @@ -2929,7 +3503,8 @@ remap_edge_summaries (struct cgraph_edge if (es->predicate) { p = es->predicate->remap_after_inlining - (info, callee_info, operand_map, offset_map, + (info, params_summary, + callee_info, operand_map, offset_map, possible_truths, *toplev_predicate); edge_set_predicate (e, &p); } @@ -2941,8 +3516,9 @@ remap_edge_summaries (struct cgraph_edge /* Same as remap_predicate, but set result into hint *HINT. */ static void -remap_hint_predicate (struct ipa_fn_summary *info, - struct ipa_fn_summary *callee_info, +remap_hint_predicate (class ipa_fn_summary *info, + class ipa_node_params *params_summary, + class ipa_fn_summary *callee_info, predicate **hint, vec operand_map, vec offset_map, @@ -2954,7 +3530,7 @@ remap_hint_predicate (struct ipa_fn_summ if (!*hint) return; p = (*hint)->remap_after_inlining - (info, callee_info, + (info, params_summary, callee_info, operand_map, offset_map, possible_truths, *toplev_predicate); if (p != false && p != true) @@ -2972,17 +3548,18 @@ void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) { ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee); - struct cgraph_node *to = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to : edge->caller); - struct ipa_fn_summary *info = ipa_fn_summaries->get (to); + struct cgraph_node *to = (edge->caller->inlined_to + ? edge->caller->inlined_to : edge->caller); + class ipa_fn_summary *info = ipa_fn_summaries->get (to); clause_t clause = 0; /* not_inline is known to be false. */ size_time_entry *e; - vec operand_map = vNULL; - vec offset_map = vNULL; + auto_vec operand_map; + auto_vec offset_map; int i; predicate toplev_predicate; - predicate true_p = true; - struct ipa_call_summary *es = ipa_call_summaries->get (edge); + class ipa_call_summary *es = ipa_call_summaries->get (edge); + class ipa_node_params *params_summary = (ipa_node_params_sum + ? IPA_NODE_REF (to) : NULL); if (es->predicate) toplev_predicate = *es->predicate; @@ -2995,8 +3572,8 @@ ipa_merge_fn_summary_after_inlining (str evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL); if (ipa_node_params_sum && callee_info->conds) { - struct ipa_edge_args *args = IPA_EDGE_REF (edge); - int count = ipa_get_cs_argument_count (args); + class ipa_edge_args *args = IPA_EDGE_REF (edge); + int count = args ? ipa_get_cs_argument_count (args) : 0; int i; if (count) @@ -3029,19 +3606,21 @@ ipa_merge_fn_summary_after_inlining (str } } operand_map[i] = map; - gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to))); + gcc_assert (map < ipa_get_param_count (params_summary)); } } for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) { predicate p; p = e->exec_predicate.remap_after_inlining - (info, callee_info, operand_map, + (info, params_summary, + callee_info, operand_map, offset_map, clause, toplev_predicate); predicate nonconstp; nonconstp = e->nonconst_predicate.remap_after_inlining - (info, callee_info, operand_map, + (info, params_summary, + callee_info, operand_map, offset_map, clause, toplev_predicate); if (p != false && nonconstp != false) @@ -3059,48 +3638,53 @@ ipa_merge_fn_summary_after_inlining (str info->account_size_time (e->size, add_time, p, nonconstp); } } - remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map, + remap_edge_summaries (edge, edge->callee, info, params_summary, + callee_info, operand_map, offset_map, clause, &toplev_predicate); - remap_hint_predicate (info, callee_info, + remap_hint_predicate (info, params_summary, callee_info, &callee_info->loop_iterations, operand_map, offset_map, clause, &toplev_predicate); - remap_hint_predicate (info, callee_info, + remap_hint_predicate (info, params_summary, callee_info, &callee_info->loop_stride, operand_map, offset_map, clause, &toplev_predicate); - ipa_call_summary *s = ipa_call_summaries->get (edge); - inline_update_callee_summaries (edge->callee, s->loop_depth); + HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee); + HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size; - /* We do not maintain predicates of inlined edges, free it. */ - edge_set_predicate (edge, &true_p); - /* Similarly remove param summaries. */ - es->param.release (); - operand_map.release (); - offset_map.release (); + if (info->estimated_stack_size < peak) + info->estimated_stack_size = peak; + + inline_update_callee_summaries (edge->callee, es->loop_depth); + + /* Free summaries that are not maintained for inline clones/edges. */ + ipa_call_summaries->remove (edge); + ipa_fn_summaries->remove (edge->callee); } -/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size - and time. Recompute it. */ +/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating + overall size and time. Recompute it. */ void ipa_update_overall_fn_summary (struct cgraph_node *node) { - struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); + class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); + class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); size_time_entry *e; int i; - info->size = 0; + size_info->size = 0; info->time = 0; for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) { - info->size += e->size; + size_info->size += e->size; info->time += e->time; } - estimate_calls_size_and_time (node, &info->size, &info->min_size, + estimate_calls_size_and_time (node, &size_info->size, &info->min_size, &info->time, NULL, ~(clause_t) (1 << predicate::false_condition), vNULL, vNULL, vNULL); - info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale; + size_info->size = (size_info->size + ipa_fn_summary::size_scale / 2) + / ipa_fn_summary::size_scale; } @@ -3181,10 +3765,10 @@ ipa_fn_summary_generate (void) /* Write inline summary for edge E to OB. */ static void -read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e, +read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e, bool prevails) { - struct ipa_call_summary *es = prevails + class ipa_call_summary *es = prevails ? ipa_call_summaries->get_create (e) : NULL; predicate p; int length, i; @@ -3235,7 +3819,7 @@ inline_read_section (struct lto_file_dec const int cfg_offset = sizeof (struct lto_function_header); const int main_offset = cfg_offset + header->cfg_size; const int string_offset = main_offset + header->main_size; - struct data_in *data_in; + class data_in *data_in; unsigned int i, count2, j; unsigned int f_count; @@ -3250,7 +3834,9 @@ inline_read_section (struct lto_file_dec { unsigned int index; struct cgraph_node *node; - struct ipa_fn_summary *info; + class ipa_fn_summary *info; + class ipa_node_params *params_summary; + class ipa_size_summary *size_info; lto_symtab_encoder_t encoder; struct bitpack_d bp; struct cgraph_edge *e; @@ -3261,6 +3847,9 @@ inline_read_section (struct lto_file_dec node = dyn_cast (lto_symtab_encoder_deref (encoder, index)); info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL; + params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL; + size_info = node->prevailing_p () + ? ipa_size_summaries->get_create (node) : NULL; int stack_size = streamer_read_uhwi (&ib); int size = streamer_read_uhwi (&ib); @@ -3269,8 +3858,8 @@ inline_read_section (struct lto_file_dec if (info) { info->estimated_stack_size - = info->estimated_self_stack_size = stack_size; - info->size = info->self_size = size; + = size_info->estimated_self_stack_size = stack_size; + size_info->size = size_info->self_size = size; info->time = time; } @@ -3288,26 +3877,70 @@ inline_read_section (struct lto_file_dec count2 = streamer_read_uhwi (&ib); gcc_assert (!info || !info->conds); + if (info) + vec_safe_reserve_exact (info->conds, count2); for (j = 0; j < count2; j++) { struct condition c; + unsigned int k, count3; c.operand_num = streamer_read_uhwi (&ib); - c.size = streamer_read_uhwi (&ib); c.code = (enum tree_code) streamer_read_uhwi (&ib); + c.type = stream_read_tree (&ib, data_in); c.val = stream_read_tree (&ib, data_in); bp = streamer_read_bitpack (&ib); c.agg_contents = bp_unpack_value (&bp, 1); c.by_ref = bp_unpack_value (&bp, 1); if (c.agg_contents) c.offset = streamer_read_uhwi (&ib); + count3 = streamer_read_uhwi (&ib); + c.param_ops = NULL; if (info) - vec_safe_push (info->conds, c); + vec_safe_reserve_exact (c.param_ops, count3); + if (params_summary) + ipa_set_param_used_by_ipa_predicates + (params_summary, c.operand_num, true); + for (k = 0; k < count3; k++) + { + struct expr_eval_op op; + enum gimple_rhs_class rhs_class; + op.code = (enum tree_code) streamer_read_uhwi (&ib); + op.type = stream_read_tree (&ib, data_in); + switch (rhs_class = get_gimple_rhs_class (op.code)) + { + case GIMPLE_UNARY_RHS: + op.index = 0; + op.val[0] = NULL_TREE; + op.val[1] = NULL_TREE; + break; + + case GIMPLE_BINARY_RHS: + case GIMPLE_TERNARY_RHS: + bp = streamer_read_bitpack (&ib); + op.index = bp_unpack_value (&bp, 2); + op.val[0] = stream_read_tree (&ib, data_in); + if (rhs_class == GIMPLE_BINARY_RHS) + op.val[1] = NULL_TREE; + else + op.val[1] = stream_read_tree (&ib, data_in); + break; + + default: + fatal_error (UNKNOWN_LOCATION, + "invalid fnsummary in LTO stream"); + } + if (info) + c.param_ops->quick_push (op); + } + if (info) + info->conds->quick_push (c); } count2 = streamer_read_uhwi (&ib); gcc_assert (!info || !info->size_time_table); + if (info && count2) + vec_safe_reserve_exact (info->size_time_table, count2); for (j = 0; j < count2; j++) { - struct size_time_entry e; + class size_time_entry e; e.size = streamer_read_uhwi (&ib); e.time = sreal::stream_in (&ib); @@ -3315,7 +3948,7 @@ inline_read_section (struct lto_file_dec e.nonconst_predicate.stream_in (&ib); if (info) - vec_safe_push (info->size_time_table, e); + info->size_time_table->quick_push (e); } p.stream_in (&ib); @@ -3378,7 +4011,7 @@ ipa_fn_summary_read (void) static void write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e) { - struct ipa_call_summary *es = ipa_call_summaries->get (e); + class ipa_call_summary *es = ipa_call_summaries->get (e); int i; streamer_write_uhwi (ob, es->call_stmt_size); @@ -3426,7 +4059,8 @@ ipa_fn_summary_write (void) cgraph_node *cnode = lsei_cgraph_node (lsei); if (cnode->definition && !cnode->alias) { - struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode); + class ipa_fn_summary *info = ipa_fn_summaries->get (cnode); + class ipa_size_summary *size_info = ipa_size_summaries->get (cnode); struct bitpack_d bp; struct cgraph_edge *edge; int i; @@ -3434,8 +4068,8 @@ ipa_fn_summary_write (void) struct condition *c; streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode)); - streamer_write_hwi (ob, info->estimated_self_stack_size); - streamer_write_hwi (ob, info->self_size); + streamer_write_hwi (ob, size_info->estimated_self_stack_size); + streamer_write_hwi (ob, size_info->self_size); info->time.stream_out (ob); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, info->inlinable, 1); @@ -3445,9 +4079,12 @@ ipa_fn_summary_write (void) streamer_write_uhwi (ob, vec_safe_length (info->conds)); for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) { + int j; + struct expr_eval_op *op; + streamer_write_uhwi (ob, c->operand_num); - streamer_write_uhwi (ob, c->size); streamer_write_uhwi (ob, c->code); + stream_write_tree (ob, c->type, true); stream_write_tree (ob, c->val, true); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, c->agg_contents, 1); @@ -3455,6 +4092,21 @@ ipa_fn_summary_write (void) streamer_write_bitpack (&bp); if (c->agg_contents) streamer_write_uhwi (ob, c->offset); + streamer_write_uhwi (ob, vec_safe_length (c->param_ops)); + for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) + { + streamer_write_uhwi (ob, op->code); + stream_write_tree (ob, op->type, true); + if (op->val[0]) + { + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, op->index, 2); + streamer_write_bitpack (&bp); + stream_write_tree (ob, op->val[0], true); + if (op->val[1]) + stream_write_tree (ob, op->val[1], true); + } + } } streamer_write_uhwi (ob, vec_safe_length (info->size_time_table)); for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) @@ -3487,23 +4139,33 @@ ipa_fn_summary_write (void) } -/* Release inline summary. */ +/* Release function summary. */ void ipa_free_fn_summary (void) { - struct cgraph_node *node; if (!ipa_call_summaries) return; - FOR_EACH_DEFINED_FUNCTION (node) - if (!node->alias) - ipa_fn_summaries->remove (node); ipa_fn_summaries->release (); ipa_fn_summaries = NULL; ipa_call_summaries->release (); delete ipa_call_summaries; ipa_call_summaries = NULL; edge_predicate_pool.release (); + /* During IPA this is one of largest datastructures to release. */ + if (flag_wpa) + ggc_trim (); +} + +/* Release function summary. */ + +void +ipa_free_size_summary (void) +{ + if (!ipa_size_summaries) + return; + ipa_size_summaries->release (); + ipa_size_summaries = NULL; } namespace { @@ -3578,10 +4240,12 @@ public: gcc_assert (n == 0); small_p = param; } - virtual bool gate (function *) { return small_p || !flag_wpa; } + virtual bool gate (function *) { return true; } virtual unsigned int execute (function *) { ipa_free_fn_summary (); + if (!flag_wpa) + ipa_free_size_summary (); return 0; } diff -Nurp a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h --- a/gcc/ipa-fnsummary.h 2020-04-30 15:14:04.588000000 +0800 +++ b/gcc/ipa-fnsummary.h 2020-04-30 15:14:56.664000000 +0800 @@ -81,16 +81,40 @@ struct GTY(()) size_time_entry sreal GTY((skip)) time; }; +/* Summary about function and stack frame sizes. We keep this info + for inline clones and also for WPA streaming. For this reason this is not + part of ipa_fn_summary which exists only for offline functions. */ +class ipa_size_summary +{ +public: + /* Estimated stack frame consumption by the function. */ + HOST_WIDE_INT estimated_self_stack_size; + /* Size of the function body. */ + int self_size; + /* Estimated size of the function after inlining. */ + int size; + + ipa_size_summary () + : estimated_self_stack_size (0), self_size (0), size (0) + { + } + /* Copy constructor. */ + ipa_size_summary (const ipa_size_summary &s) + : estimated_self_stack_size (0), self_size (s.self_size), size (s.size) + { + } +}; + /* Function inlining information. */ struct GTY(()) ipa_fn_summary { /* Keep all field empty so summary dumping works during its computation. This is useful for debugging. */ ipa_fn_summary () - : estimated_self_stack_size (0), self_size (0), min_size (0), + : min_size (0), inlinable (false), single_caller (false), fp_expressions (false), estimated_stack_size (false), - stack_frame_offset (false), time (0), size (0), conds (NULL), + time (0), conds (NULL), size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL), growth (0), scc_no (0) { @@ -98,13 +122,11 @@ struct GTY(()) ipa_fn_summary /* Copy constructor. */ ipa_fn_summary (const ipa_fn_summary &s) - : estimated_self_stack_size (s.estimated_self_stack_size), - self_size (s.self_size), min_size (s.min_size), + : min_size (s.min_size), inlinable (s.inlinable), single_caller (s.single_caller), fp_expressions (s.fp_expressions), estimated_stack_size (s.estimated_stack_size), - stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size), - conds (s.conds), size_time_table (s.size_time_table), + time (s.time), conds (s.conds), size_time_table (s.size_time_table), loop_iterations (s.loop_iterations), loop_stride (s.loop_stride), growth (s.growth), scc_no (s.scc_no) {} @@ -114,10 +136,6 @@ struct GTY(()) ipa_fn_summary /* Information about the function body itself. */ - /* Estimated stack frame consumption by the function. */ - HOST_WIDE_INT estimated_self_stack_size; - /* Size of the function body. */ - int self_size; /* Minimal size increase after inlining. */ int min_size; @@ -135,11 +153,8 @@ struct GTY(()) ipa_fn_summary /* Estimated stack frame consumption by the function. */ HOST_WIDE_INT estimated_stack_size; - /* Expected offset of the stack frame of function. */ - HOST_WIDE_INT stack_frame_offset; - /* Estimated size of the function after inlining. */ + /* Estimated runtime of function after inlining. */ sreal GTY((skip)) time; - int size; /* Conditional size/time information. The summaries are being merged during inlining. */ @@ -177,7 +192,7 @@ public: static ipa_fn_summary_t *create_ggc (symbol_table *symtab) { - struct ipa_fn_summary_t *summary = new (ggc_alloc ()) + class ipa_fn_summary_t *summary = new (ggc_alloc ()) ipa_fn_summary_t (symtab); summary->disable_insertion_hook (); return summary; @@ -199,6 +214,24 @@ public: extern GTY(()) fast_function_summary *ipa_fn_summaries; +class ipa_size_summary_t: + public fast_function_summary +{ +public: + ipa_size_summary_t (symbol_table *symtab): + fast_function_summary (symtab) {} + + static ipa_size_summary_t *create_ggc (symbol_table *symtab) + { + class ipa_size_summary_t *summary = new (ggc_alloc ()) + ipa_size_summary_t (symtab); + summary->disable_insertion_hook (); + return summary; + } +}; +extern fast_function_summary + *ipa_size_summaries; + /* Information kept about callgraph edges. */ struct ipa_call_summary { @@ -245,6 +278,57 @@ public: ipa_call_summary *dst_data); }; +/* This object describe a context of call. That is a summary of known + information about its parameters. Main purpose of this context is + to give more realistic esitmations of function runtime, size and + inline hints. */ +class ipa_call_context +{ +public: + ipa_call_context (cgraph_node *node, + clause_t possible_truths, + clause_t nonspec_possible_truths, + vec known_vals, + vec known_contexts, + vec known_aggs, + vec m_inline_param_summary); + ipa_call_context () + : m_node(NULL) + { + } + void estimate_size_and_time (int *ret_size, int *ret_min_size, + sreal *ret_time, + sreal *ret_nonspecialized_time, + ipa_hints *ret_hints); + void duplicate_from (const ipa_call_context &ctx); + void release (bool all = false); + bool equal_to (const ipa_call_context &); + bool exists_p () + { + return m_node != NULL; + } +private: + /* Called function. */ + cgraph_node *m_node; + /* Clause describing what predicate conditionals can be satisfied + in this context if function is inlined/specialised. */ + clause_t m_possible_truths; + /* Clause describing what predicate conditionals can be satisfied + in this context if function is kept offline. */ + clause_t m_nonspec_possible_truths; + /* Inline summary maintains info about change probabilities. */ + vec m_inline_param_summary; + + /* The following is used only to resolve indirect calls. */ + + /* Vector describing known values of parameters. */ + vec m_known_vals; + /* Vector describing known polymorphic call contexts. */ + vec m_known_contexts; + /* Vector describing known aggregate values. */ + vec m_known_aggs; +}; + extern fast_call_summary *ipa_call_summaries; /* In ipa-fnsummary.c */ @@ -253,11 +337,12 @@ void ipa_dump_fn_summaries (FILE *f); void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node); void ipa_dump_hints (FILE *f, ipa_hints); void ipa_free_fn_summary (void); +void ipa_free_size_summary (void); void inline_analyze_function (struct cgraph_node *node); void estimate_ipcp_clone_size_and_time (struct cgraph_node *, vec, vec, - vec, + vec, int *, sreal *, sreal *, ipa_hints *); void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge); @@ -265,26 +350,16 @@ void ipa_update_overall_fn_summary (stru void compute_fn_summary (struct cgraph_node *, bool); -void evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, +void evaluate_properties_for_edge (struct cgraph_edge *e, + bool inline_p, clause_t *clause_ptr, clause_t *nonspec_clause_ptr, vec *known_vals_ptr, vec *known_contexts_ptr, - vec *); -void estimate_node_size_and_time (struct cgraph_node *node, - clause_t possible_truths, - clause_t nonspec_possible_truths, - vec known_vals, - vec, - vec known_aggs, - int *ret_size, int *ret_min_size, - sreal *ret_time, - sreal *ret_nonspecialized_time, - ipa_hints *ret_hints, - vec - inline_param_summary); + vec *); void ipa_fnsummary_c_finalize (void); +HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node); #endif /* GCC_IPA_FNSUMMARY_H */ diff -Nurp a/gcc/ipa-icf.c b/gcc/ipa-icf.c --- a/gcc/ipa-icf.c 2020-04-30 15:14:04.596000000 +0800 +++ b/gcc/ipa-icf.c 2020-04-30 15:14:56.632000000 +0800 @@ -491,7 +491,7 @@ sem_function::param_used_p (unsigned int struct ipa_node_params *parms_info = IPA_NODE_REF (get_node ()); - if (vec_safe_length (parms_info->descriptors) <= i) + if (!parms_info || vec_safe_length (parms_info->descriptors) <= i) return true; return ipa_is_param_used (IPA_NODE_REF (get_node ()), i); @@ -1149,8 +1149,8 @@ sem_function::merge (sem_item *alias_ite "cannot create wrapper of stdarg function.\n"); } else if (ipa_fn_summaries - && ipa_fn_summaries->get (alias) != NULL - && ipa_fn_summaries->get (alias)->self_size <= 2) + && ipa_size_summaries->get (alias) != NULL + && ipa_size_summaries->get (alias)->self_size <= 2) { if (dump_file) fprintf (dump_file, "Wrapper creation is not " @@ -1268,6 +1268,7 @@ sem_function::merge (sem_item *alias_ite /* Remove the function's body. */ ipa_merge_profiles (original, alias); + symtab->call_cgraph_removal_hooks (alias); alias->release_body (true); alias->reset (); /* Notice global symbol possibly produced RTL. */ @@ -1288,11 +1289,13 @@ sem_function::merge (sem_item *alias_ite { gcc_assert (!create_alias); alias->icf_merged = true; + symtab->call_cgraph_removal_hooks (alias); local_original->icf_merged = true; /* FIXME update local_original counts. */ ipa_merge_profiles (original, alias, true); alias->create_wrapper (local_original); + symtab->call_cgraph_insertion_hooks (alias); if (dump_file) fprintf (dump_file, "Unified; Wrapper has been created.\n\n"); diff -Nurp a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c --- a/gcc/ipa-inline-analysis.c 2020-04-30 15:14:04.556000000 +0800 +++ b/gcc/ipa-inline-analysis.c 2020-04-30 15:14:56.680000000 +0800 @@ -53,6 +53,48 @@ along with GCC; see the file COPYING3. /* Cached node/edge growths. */ call_summary *edge_growth_cache = NULL; +/* The context cache remembers estimated time/size and hints for given + ipa_call_context of a call. */ +class node_context_cache_entry +{ +public: + ipa_call_context ctx; + sreal time, nonspec_time; + int size; + ipa_hints hints; + + node_context_cache_entry () + : ctx () + { + } + ~node_context_cache_entry () + { + ctx.release (); + } +}; + +/* At the moment we implement primitive single entry LRU cache. */ +class node_context_summary +{ +public: + node_context_cache_entry entry; + + node_context_summary () + : entry () + { + } + ~node_context_summary () + { + } +}; + +/* Summary holding the context cache. */ +static fast_function_summary + *node_context_cache = NULL; +/* Statistics about the context cache effectivity. */ +static long node_context_cache_hit, node_context_cache_miss, + node_context_cache_clear; + /* Give initial reasons why inlining would fail on EDGE. This gets either nullified or usually overwritten by more precise reasons later. */ @@ -77,6 +119,16 @@ initialize_inline_failed (struct cgraph_ == CIF_FINAL_ERROR); } +/* Allocate edge growth caches. */ + +void +initialize_growth_caches () +{ + edge_growth_cache + = new call_summary (symtab, false); + node_context_cache + = new fast_function_summary (symtab); +} /* Free growth caches. */ @@ -84,7 +136,17 @@ void free_growth_caches (void) { delete edge_growth_cache; + delete node_context_cache; edge_growth_cache = NULL; + node_context_cache = NULL; + if (dump_file) + fprintf (dump_file, "node context cache: %li hits, %li misses," + " %li initializations\n", + node_context_cache_hit, node_context_cache_miss, + node_context_cache_clear); + node_context_cache_hit = 0; + node_context_cache_miss = 0; + node_context_cache_clear = 0; } /* Return hints derrived from EDGE. */ @@ -93,8 +155,8 @@ int simple_edge_hints (struct cgraph_edge *edge) { int hints = 0; - struct cgraph_node *to = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to : edge->caller); + struct cgraph_node *to = (edge->caller->inlined_to + ? edge->caller->inlined_to : edge->caller); struct cgraph_node *callee = edge->callee->ultimate_alias_target (); int to_scc_no = ipa_fn_summaries->get (to)->scc_no; int callee_scc_no = ipa_fn_summaries->get (callee)->scc_no; @@ -127,9 +189,9 @@ do_estimate_edge_time (struct cgraph_edg clause_t clause, nonspec_clause; vec known_vals; vec known_contexts; - vec known_aggs; - struct ipa_call_summary *es = ipa_call_summaries->get (edge); - int min_size; + vec known_aggs; + class ipa_call_summary *es = ipa_call_summaries->get (edge); + int min_size = -1; callee = edge->callee->ultimate_alias_target (); @@ -137,9 +199,53 @@ do_estimate_edge_time (struct cgraph_edg evaluate_properties_for_edge (edge, true, &clause, &nonspec_clause, &known_vals, &known_contexts, &known_aggs); - estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals, - known_contexts, known_aggs, &size, &min_size, - &time, &nonspec_time, &hints, es->param); + ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, + known_contexts, known_aggs, es->param); + if (node_context_cache != NULL) + { + node_context_summary *e = node_context_cache->get_create (callee); + if (e->entry.ctx.equal_to (ctx)) + { + node_context_cache_hit++; + size = e->entry.size; + time = e->entry.time; + nonspec_time = e->entry.nonspec_time; + hints = e->entry.hints; + if (flag_checking + && !callee->count.ipa_p ()) + { + sreal chk_time, chk_nonspec_time; + int chk_size, chk_min_size; + + ipa_hints chk_hints; + ctx.estimate_size_and_time (&chk_size, &chk_min_size, + &chk_time, &chk_nonspec_time, + &chk_hints); + gcc_assert (chk_size == size && chk_time == time + && chk_nonspec_time == nonspec_time + && chk_hints == hints); + } + } + else + { + if (e->entry.ctx.exists_p ()) + node_context_cache_miss++; + else + node_context_cache_clear++; + e->entry.ctx.release (true); + e->entry.ctx = ctx; + ctx.estimate_size_and_time (&size, &min_size, + &time, &nonspec_time, &hints); + e->entry.size = size; + e->entry.time = time; + e->entry.nonspec_time = nonspec_time; + e->entry.hints = hints; + e->entry.ctx.duplicate_from (ctx); + } + } + else + ctx.estimate_size_and_time (&size, &min_size, + &time, &nonspec_time, &hints); /* When we have profile feedback, we can quite safely identify hot edges and for those we disable size limits. Don't do that when @@ -147,21 +253,21 @@ do_estimate_edge_time (struct cgraph_edg may hurt optimization of the caller's hot path. */ if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p () && (edge->count.ipa ().apply_scale (2, 1) - > (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to->count.ipa () + > (edge->caller->inlined_to + ? edge->caller->inlined_to->count.ipa () : edge->caller->count.ipa ()))) hints |= INLINE_HINT_known_hot; - known_vals.release (); - known_contexts.release (); - known_aggs.release (); + ctx.release (); gcc_checking_assert (size >= 0); gcc_checking_assert (time >= 0); /* When caching, update the cache entry. */ if (edge_growth_cache != NULL) { - ipa_fn_summaries->get_create (edge->callee)->min_size = min_size; + if (min_size >= 0) + ipa_fn_summaries->get (edge->callee->function_symbol ())->min_size + = min_size; edge_growth_cache_entry *entry = edge_growth_cache->get_create (edge); entry->time = time; @@ -174,6 +280,14 @@ do_estimate_edge_time (struct cgraph_edg return time; } +/* Reset cache for NODE. + This must be done each time NODE body is modified. */ +void +reset_node_cache (struct cgraph_node *node) +{ + if (node_context_cache) + node_context_cache->remove (node); +} /* Return estimated callee growth after inlining EDGE. Only to be called via estimate_edge_size. */ @@ -186,7 +300,7 @@ do_estimate_edge_size (struct cgraph_edg clause_t clause, nonspec_clause; vec known_vals; vec known_contexts; - vec known_aggs; + vec known_aggs; /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -206,12 +320,10 @@ do_estimate_edge_size (struct cgraph_edg &clause, &nonspec_clause, &known_vals, &known_contexts, &known_aggs); - estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals, - known_contexts, known_aggs, &size, NULL, NULL, - NULL, NULL, vNULL); - known_vals.release (); - known_contexts.release (); - known_aggs.release (); + ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, + known_contexts, known_aggs, vNULL); + ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL); + ctx.release (); return size; } @@ -227,7 +339,7 @@ do_estimate_edge_hints (struct cgraph_ed clause_t clause, nonspec_clause; vec known_vals; vec known_contexts; - vec known_aggs; + vec known_aggs; /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -247,12 +359,10 @@ do_estimate_edge_hints (struct cgraph_ed &clause, &nonspec_clause, &known_vals, &known_contexts, &known_aggs); - estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals, - known_contexts, known_aggs, NULL, NULL, - NULL, NULL, &hints, vNULL); - known_vals.release (); - known_contexts.release (); - known_aggs.release (); + ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, + known_contexts, known_aggs, vNULL); + ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints); + ctx.release (); hints |= simple_edge_hints (edge); return hints; } @@ -264,8 +374,8 @@ int estimate_size_after_inlining (struct cgraph_node *node, struct cgraph_edge *edge) { - struct ipa_call_summary *es = ipa_call_summaries->get (edge); - ipa_fn_summary *s = ipa_fn_summaries->get (node); + class ipa_call_summary *es = ipa_call_summaries->get (edge); + ipa_size_summary *s = ipa_size_summaries->get (node); if (!es->predicate || *es->predicate != false) { int size = s->size + estimate_edge_growth (edge); @@ -321,7 +431,7 @@ int estimate_growth (struct cgraph_node *node) { struct growth_data d = { node, false, false, 0 }; - struct ipa_fn_summary *info = ipa_fn_summaries->get (node); + class ipa_size_summary *info = ipa_size_summaries->get (node); node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true); @@ -396,7 +506,7 @@ growth_likely_positive (struct cgraph_no || node->address_taken) return true; - max_callers = ipa_fn_summaries->get (node)->size * 4 / edge_growth + 2; + max_callers = ipa_size_summaries->get (node)->size * 4 / edge_growth + 2; for (e = node->callers; e; e = e->next_caller) { diff -Nurp a/gcc/ipa-inline.c b/gcc/ipa-inline.c --- a/gcc/ipa-inline.c 2020-04-30 15:14:04.652000000 +0800 +++ b/gcc/ipa-inline.c 2020-04-30 15:14:56.684000000 +0800 @@ -150,8 +150,7 @@ caller_growth_limits (struct cgraph_edge int newsize; int limit = 0; HOST_WIDE_INT stack_size_limit = 0, inlined_stack; - ipa_fn_summary *info, *what_info; - ipa_fn_summary *outer_info = ipa_fn_summaries->get (to); + ipa_size_summary *outer_info = ipa_size_summaries->get (to); /* Look for function e->caller is inlined to. While doing so work out the largest function body on the way. As @@ -163,28 +162,29 @@ caller_growth_limits (struct cgraph_edge too much in order to prevent compiler from exploding". */ while (true) { - info = ipa_fn_summaries->get (to); - if (limit < info->self_size) - limit = info->self_size; - if (stack_size_limit < info->estimated_self_stack_size) - stack_size_limit = info->estimated_self_stack_size; - if (to->global.inlined_to) + ipa_size_summary *size_info = ipa_size_summaries->get (to); + if (limit < size_info->self_size) + limit = size_info->self_size; + if (stack_size_limit < size_info->estimated_self_stack_size) + stack_size_limit = size_info->estimated_self_stack_size; + if (to->inlined_to) to = to->callers->caller; else break; } - what_info = ipa_fn_summaries->get (what); + ipa_fn_summary *what_info = ipa_fn_summaries->get (what); + ipa_size_summary *what_size_info = ipa_size_summaries->get (what); - if (limit < what_info->self_size) - limit = what_info->self_size; + if (limit < what_size_info->self_size) + limit = what_size_info->self_size; limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100; /* Check the size after inlining against the function limits. But allow the function to shrink if it went over the limits by forced inlining. */ newsize = estimate_size_after_inlining (to, e); - if (newsize >= info->size + if (newsize >= ipa_size_summaries->get (what)->size && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) && newsize > limit) { @@ -203,7 +203,7 @@ caller_growth_limits (struct cgraph_edge stack_size_limit += ((gcov_type)stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100); - inlined_stack = (outer_info->stack_frame_offset + inlined_stack = (ipa_get_stack_frame_offset (to) + outer_info->estimated_self_stack_size + what_info->estimated_stack_size); /* Check new stack consumption with stack consumption at the place @@ -213,7 +213,7 @@ caller_growth_limits (struct cgraph_edge inline call, we can inline, too. This bit overoptimistically assume that we are good at stack packing. */ - && inlined_stack > info->estimated_stack_size + && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME)) { e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT; @@ -321,8 +321,8 @@ can_inline_edge_p (struct cgraph_edge *e bool inlinable = true; enum availability avail; - cgraph_node *caller = e->caller->global.inlined_to - ? e->caller->global.inlined_to : e->caller; + cgraph_node *caller = (e->caller->inlined_to + ? e->caller->inlined_to : e->caller); cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller); if (!callee->definition) @@ -414,8 +414,8 @@ can_inline_edge_by_limits_p (struct cgra bool inlinable = true; enum availability avail; - cgraph_node *caller = e->caller->global.inlined_to - ? e->caller->global.inlined_to : e->caller; + cgraph_node *caller = (e->caller->inlined_to + ? e->caller->inlined_to : e->caller); cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller); tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl); tree callee_tree @@ -687,8 +687,8 @@ inline sreal compute_uninlined_call_time (struct cgraph_edge *edge, sreal uninlined_call_time) { - cgraph_node *caller = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to + cgraph_node *caller = (edge->caller->inlined_to + ? edge->caller->inlined_to : edge->caller); sreal freq = edge->sreal_frequency (); @@ -708,8 +708,8 @@ inline sreal compute_inlined_call_time (struct cgraph_edge *edge, sreal time) { - cgraph_node *caller = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to + cgraph_node *caller = (edge->caller->inlined_to + ? edge->caller->inlined_to : edge->caller); sreal caller_time = ipa_fn_summaries->get (caller)->time; @@ -895,7 +895,7 @@ want_inline_self_recursive_call_p (struc reason = "--param max-inline-recursive-depth exceeded."; want_inline = false; } - else if (outer_node->global.inlined_to + else if (outer_node->inlined_to && (caller_freq = outer_node->callers->sreal_frequency ()) == 0) { reason = "caller frequency is 0"; @@ -1005,7 +1005,7 @@ want_inline_function_to_all_callers_p (s if (node->alias) return false; /* Already inlined? */ - if (node->global.inlined_to) + if (node->inlined_to) return false; /* Does it have callers? */ if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true)) @@ -1037,8 +1037,8 @@ edge_badness (struct cgraph_edge *edge, struct cgraph_node *callee = edge->callee->ultimate_alias_target (); struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee); ipa_hints hints; - cgraph_node *caller = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to + cgraph_node *caller = (edge->caller->inlined_to + ? edge->caller->inlined_to : edge->caller); growth = estimate_edge_growth (edge); @@ -1051,7 +1051,7 @@ edge_badness (struct cgraph_edge *edge, gcc_checking_assert ((edge_time * 100 - callee_info->time * 101).to_int () <= 0 || callee->count.ipa ().initialized_p ()); - gcc_checking_assert (growth <= callee_info->size); + gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size); if (dump) { @@ -1122,7 +1122,7 @@ edge_badness (struct cgraph_edge *edge, if (need_more_work) noninline_callee (); } - Withhout panilizing this case, we usually inline noninline_callee + Withhout penalizing this case, we usually inline noninline_callee into the inline_caller because overall_growth is small preventing further inlining of inline_caller. @@ -1132,7 +1132,7 @@ edge_badness (struct cgraph_edge *edge, if (growth > overall_growth /* ... and having only one caller which is not inlined ... */ && callee_info->single_caller - && !edge->caller->global.inlined_to + && !edge->caller->inlined_to /* ... and edges executed only conditionally ... */ && edge->sreal_frequency () < 1 /* ... consider case where callee is not inline but caller is ... */ @@ -1155,7 +1155,7 @@ edge_badness (struct cgraph_edge *edge, and it is not called once and. */ if (!caller_info->single_caller && overall_growth < caller_growth && caller_info->inlinable - && caller_info->size + && ipa_size_summaries->get (caller)->size < (DECL_DECLARED_INLINE_P (caller->decl) ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO)) { @@ -1178,7 +1178,7 @@ edge_badness (struct cgraph_edge *edge, overall_growth += 256 * 256 - 256; denominator *= overall_growth; } - denominator *= ipa_fn_summaries->get (caller)->self_size + growth; + denominator *= ipa_size_summaries->get (caller)->size + growth; badness = - numerator / denominator; @@ -1300,8 +1300,10 @@ reset_edge_caches (struct cgraph_node *n struct cgraph_node *where = node; struct ipa_ref *ref; - if (where->global.inlined_to) - where = where->global.inlined_to; + if (where->inlined_to) + where = where->inlined_to; + + reset_node_cache (where); if (edge_growth_cache != NULL) for (edge = where->callers; edge; edge = edge->next_caller) @@ -1351,7 +1353,7 @@ update_caller_keys (edge_heap_t *heap, s struct ipa_ref *ref; if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable) - || node->global.inlined_to) + || node->inlined_to) return; if (!bitmap_set_bit (updated_nodes, node->get_uid ())) return; @@ -1479,8 +1481,8 @@ recursive_inlining (struct cgraph_edge * int n = 0; node = edge->caller; - if (node->global.inlined_to) - node = node->global.inlined_to; + if (node->inlined_to) + node = node->inlined_to; if (DECL_DECLARED_INLINE_P (node->decl)) limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE); @@ -1528,7 +1530,7 @@ recursive_inlining (struct cgraph_edge * depth = 1; for (cnode = curr->caller; - cnode->global.inlined_to; cnode = cnode->callers->caller) + cnode->inlined_to; cnode = cnode->callers->caller) if (node->decl == curr->callee->ultimate_alias_target ()->decl) depth++; @@ -1567,6 +1569,7 @@ recursive_inlining (struct cgraph_edge * } inline_call (curr, false, new_edges, &overall_size, true); + reset_node_cache (node); lookup_recursive_calls (node, curr->callee, &heap); n++; } @@ -1581,8 +1584,8 @@ recursive_inlining (struct cgraph_edge * dump_printf_loc (MSG_NOTE, edge->call_stmt, "\n Inlined %i times, " "body grown from size %i to %i, time %f to %f\n", n, - ipa_fn_summaries->get (master_clone)->size, - ipa_fn_summaries->get (node)->size, + ipa_size_summaries->get (master_clone)->size, + ipa_size_summaries->get (node)->size, ipa_fn_summaries->get (master_clone)->time.to_double (), ipa_fn_summaries->get (node)->time.to_double ()); @@ -1593,7 +1596,7 @@ recursive_inlining (struct cgraph_edge * node = next) { next = symtab->next_function (node); - if (node->global.inlined_to == master_clone) + if (node->inlined_to == master_clone) node->remove (); } master_clone->remove (); @@ -1707,8 +1710,8 @@ resolve_noninline_speculation (edge_heap if (edge->speculative && !speculation_useful_p (edge, false)) { struct cgraph_node *node = edge->caller; - struct cgraph_node *where = node->global.inlined_to - ? node->global.inlined_to : node; + struct cgraph_node *where = node->inlined_to + ? node->inlined_to : node; auto_bitmap updated_nodes; if (edge->count.ipa ().initialized_p ()) @@ -1749,6 +1752,16 @@ sum_callers (struct cgraph_node *node, v return false; } +/* We only propagate across edges with non-interposable callee. */ + +inline bool +ignore_edge_p (struct cgraph_edge *e) +{ + enum availability avail; + e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); + return (avail <= AVAIL_INTERPOSABLE); +} + /* We use greedy algorithm for inlining of small functions: All inline candidates are put into prioritized heap ordered in increasing badness. @@ -1776,11 +1789,11 @@ inline_small_functions (void) metrics. */ max_count = profile_count::uninitialized (); - ipa_reduced_postorder (order, true, NULL); + ipa_reduced_postorder (order, true, ignore_edge_p); free (order); FOR_EACH_DEFINED_FUNCTION (node) - if (!node->global.inlined_to) + if (!node->inlined_to) { if (!node->alias && node->analyzed && (node->has_gimple_body_p () || node->thunk.thunk_p) @@ -1792,7 +1805,7 @@ inline_small_functions (void) /* Do not account external functions, they will be optimized out if not inlined. Also only count the non-cold portion of program. */ if (inline_account_function_p (node)) - initial_size += info->size; + initial_size += ipa_size_summaries->get (node)->size; info->growth = estimate_growth (node); int num_calls = 0; @@ -1808,7 +1821,8 @@ inline_small_functions (void) n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle) if (opt_for_fn (n2->decl, optimize)) { - ipa_fn_summary *info2 = ipa_fn_summaries->get (n2); + ipa_fn_summary *info2 = ipa_fn_summaries->get + (n2->inlined_to ? n2->inlined_to : n2); if (info2->scc_no) break; info2->scc_no = id; @@ -1820,8 +1834,7 @@ inline_small_functions (void) max_count = max_count.max (edge->count.ipa ()); } ipa_free_postorder_info (); - edge_growth_cache - = new call_summary (symtab, false); + initialize_growth_caches (); if (dump_file) fprintf (dump_file, @@ -1872,8 +1885,8 @@ inline_small_functions (void) } if (update) { - struct cgraph_node *where = node->global.inlined_to - ? node->global.inlined_to : node; + struct cgraph_node *where = node->inlined_to + ? node->inlined_to : node; ipa_update_overall_fn_summary (where); reset_edge_caches (where); update_caller_keys (&edge_heap, where, @@ -1902,11 +1915,10 @@ inline_small_functions (void) if (!edge->inline_failed || !edge->callee->analyzed) continue; -#if CHECKING_P /* Be sure that caches are maintained consistent. This check is affected by scaling roundoff errors when compiling for IPA this we skip it in that case. */ - if (!edge->callee->count.ipa_p () + if (flag_checking && !edge->callee->count.ipa_p () && (!max_count.initialized_p () || !max_count.nonzero_p ())) { sreal cached_badness = edge_badness (edge, false); @@ -1917,6 +1929,9 @@ inline_small_functions (void) if (edge_growth_cache != NULL) edge_growth_cache->remove (edge); + reset_node_cache (edge->caller->inlined_to + ? edge->caller->inlined_to + : edge->caller); gcc_assert (old_size_est == estimate_edge_size (edge)); gcc_assert (old_time_est == estimate_edge_time (edge)); /* FIXME: @@ -1941,9 +1956,6 @@ inline_small_functions (void) } else current_badness = edge_badness (edge, false); -#else - current_badness = edge_badness (edge, false); -#endif if (current_badness != badness) { if (edge_heap.min () && current_badness > edge_heap.min_key ()) @@ -1969,7 +1981,7 @@ inline_small_functions (void) fprintf (dump_file, "\nConsidering %s with %i size\n", callee->dump_name (), - ipa_fn_summaries->get (callee)->size); + ipa_size_summaries->get (callee)->size); fprintf (dump_file, " to be inlined into %s in %s:%i\n" " Estimated badness is %f, frequency %.2f.\n", @@ -2017,8 +2029,8 @@ inline_small_functions (void) if (edge->recursive_p ()) { where = edge->caller; - if (where->global.inlined_to) - where = where->global.inlined_to; + if (where->inlined_to) + where = where->inlined_to; if (!recursive_inlining (edge, opt_for_fn (edge->caller->decl, flag_indirect_inlining) @@ -2048,7 +2060,7 @@ inline_small_functions (void) selective. */ where = edge->caller; - while (where->global.inlined_to) + while (where->inlined_to) { if (where->decl == callee->decl) outer_node = where, depth++; @@ -2067,17 +2079,16 @@ inline_small_functions (void) else if (depth && dump_file) fprintf (dump_file, " Peeling recursion with depth %i\n", depth); - gcc_checking_assert (!callee->global.inlined_to); + gcc_checking_assert (!callee->inlined_to); inline_call (edge, true, &new_indirect_edges, &overall_size, true); - add_new_edges_to_heap (&edge_heap, new_indirect_edges); - reset_edge_caches (edge->callee); + add_new_edges_to_heap (&edge_heap, new_indirect_edges); update_callee_keys (&edge_heap, where, updated_nodes); } where = edge->caller; - if (where->global.inlined_to) - where = where->global.inlined_to; + if (where->inlined_to) + where = where->inlined_to; /* Our profitability metric can depend on local properties such as number of inlinable calls and size of the function body. @@ -2095,7 +2106,7 @@ inline_small_functions (void) if (dump_enabled_p ()) { - ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller); + ipa_fn_summary *s = ipa_fn_summaries->get (where); /* dump_printf can't handle %+i. */ char buf_net_change[100]; @@ -2106,7 +2117,9 @@ inline_small_functions (void) " Inlined %C into %C which now has time %f and " "size %i, net change of %s.\n", edge->callee, edge->caller, - s->time.to_double (), s->size, buf_net_change); + s->time.to_double (), + ipa_size_summaries->get (edge->caller)->size, + buf_net_change); } if (min_size > overall_size) { @@ -2208,8 +2221,8 @@ flatten_function (struct cgraph_node *no node->aux = NULL; if (update) - ipa_update_overall_fn_summary (node->global.inlined_to - ? node->global.inlined_to : node); + ipa_update_overall_fn_summary (node->inlined_to + ? node->inlined_to : node); } /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases. @@ -2223,7 +2236,7 @@ inline_to_all_callers_1 (struct cgraph_n int *num_calls = (int *)data; bool callee_removed = false; - while (node->callers && !node->global.inlined_to) + while (node->callers && !node->inlined_to) { struct cgraph_node *caller = node->callers->caller; @@ -2243,11 +2256,11 @@ inline_to_all_callers_1 (struct cgraph_n fprintf (dump_file, "\nInlining %s size %i.\n", ultimate->name (), - ipa_fn_summaries->get (ultimate)->size); + ipa_size_summaries->get (ultimate)->size); fprintf (dump_file, " Called once from %s %i insns.\n", node->callers->caller->name (), - ipa_fn_summaries->get (node->callers->caller)->size); + ipa_size_summaries->get (node->callers->caller)->size); } /* Remember which callers we inlined to, delaying updating the @@ -2258,7 +2271,7 @@ inline_to_all_callers_1 (struct cgraph_n fprintf (dump_file, " Inlined into %s which now has %i size\n", caller->name (), - ipa_fn_summaries->get (caller)->size); + ipa_size_summaries->get (caller)->size); if (!(*num_calls)--) { if (dump_file) @@ -2296,7 +2309,7 @@ dump_overall_stats (void) struct cgraph_node *node; FOR_EACH_DEFINED_FUNCTION (node) - if (!node->global.inlined_to + if (!node->inlined_to && !node->alias) { ipa_fn_summary *s = ipa_fn_summaries->get (node); @@ -2482,8 +2495,9 @@ ipa_inline (void) for (i = nnodes - 1, j = i; i >= 0; i--) { node = order[i]; - if (lookup_attribute ("flatten", - DECL_ATTRIBUTES (node->decl)) != NULL) + if (node->definition + && lookup_attribute ("flatten", + DECL_ATTRIBUTES (node->decl)) != NULL) order[j--] = order[i]; } @@ -2588,8 +2602,8 @@ ipa_inline (void) } if (update) { - struct cgraph_node *where = node->global.inlined_to - ? node->global.inlined_to : node; + struct cgraph_node *where = node->inlined_to + ? node->inlined_to : node; reset_edge_caches (where); ipa_update_overall_fn_summary (where); } diff -Nurp a/gcc/ipa-inline.h b/gcc/ipa-inline.h --- a/gcc/ipa-inline.h 2020-04-30 15:14:04.608000000 +0800 +++ b/gcc/ipa-inline.h 2020-04-30 15:14:56.608000000 +0800 @@ -47,6 +47,8 @@ bool growth_likely_positive (struct cgra int do_estimate_edge_size (struct cgraph_edge *edge); sreal do_estimate_edge_time (struct cgraph_edge *edge); ipa_hints do_estimate_edge_hints (struct cgraph_edge *edge); +void reset_node_cache (struct cgraph_node *node); +void initialize_growth_caches (); void free_growth_caches (void); /* In ipa-inline.c */ diff -Nurp a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c --- a/gcc/ipa-inline-transform.c 2020-04-30 15:14:04.568000000 +0800 +++ b/gcc/ipa-inline-transform.c 2020-04-30 15:14:56.624000000 +0800 @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. #include "function.h" #include "cfg.h" #include "basic-block.h" +#include "ipa-utils.h" int ncalls_inlined; int nfunctions_inlined; @@ -166,8 +167,8 @@ clone_inlined_nodes (struct cgraph_edge struct cgraph_node *inlining_into; struct cgraph_edge *next; - if (e->caller->global.inlined_to) - inlining_into = e->caller->global.inlined_to; + if (e->caller->inlined_to) + inlining_into = e->caller->inlined_to; else inlining_into = e->caller; @@ -193,14 +194,14 @@ clone_inlined_nodes (struct cgraph_edge For now we keep the ohter functions in the group in program until cgraph_remove_unreachable_functions gets rid of them. */ - gcc_assert (!e->callee->global.inlined_to); + gcc_assert (!e->callee->inlined_to); e->callee->remove_from_same_comdat_group (); if (e->callee->definition && inline_account_function_p (e->callee)) { gcc_assert (!e->callee->alias); if (overall_size) - *overall_size -= ipa_fn_summaries->get (e->callee)->size; + *overall_size -= ipa_size_summaries->get (e->callee)->size; nfunctions_inlined++; } duplicate = false; @@ -226,7 +227,7 @@ clone_inlined_nodes (struct cgraph_edge else e->callee->remove_from_same_comdat_group (); - e->callee->global.inlined_to = inlining_into; + e->callee->inlined_to = inlining_into; /* Recursively clone all bodies. */ for (e = e->callee->callees; e; e = next) @@ -310,20 +311,24 @@ inline_call (struct cgraph_edge *e, bool /* Don't inline inlined edges. */ gcc_assert (e->inline_failed); /* Don't even think of inlining inline clone. */ - gcc_assert (!callee->global.inlined_to); + gcc_assert (!callee->inlined_to); to = e->caller; - if (to->global.inlined_to) - to = to->global.inlined_to; + if (to->inlined_to) + to = to->inlined_to; if (to->thunk.thunk_p) { struct cgraph_node *target = to->callees->callee; + thunk_expansion = true; + symtab->call_cgraph_removal_hooks (to); if (in_lto_p) to->get_untransformed_body (); to->expand_thunk (false, true); /* When thunk is instrumented we may have multiple callees. */ for (e = to->callees; e && e->callee != target; e = e->next_callee) ; + symtab->call_cgraph_insertion_hooks (to); + thunk_expansion = false; gcc_assert (e); } @@ -442,9 +447,9 @@ inline_call (struct cgraph_edge *e, bool clone_inlined_nodes (e, true, update_original, overall_size); - gcc_assert (curr->callee->global.inlined_to == to); + gcc_assert (curr->callee->inlined_to == to); - old_size = ipa_fn_summaries->get (to)->size; + old_size = ipa_size_summaries->get (to)->size; ipa_merge_fn_summary_after_inlining (e); if (e->in_polymorphic_cdtor) mark_all_inlined_calls_cdtor (e->callee); @@ -458,8 +463,8 @@ inline_call (struct cgraph_edge *e, bool work for further inlining into this function. Before inlining the function we inlined to again we expect the caller to update the overall summary. */ - ipa_fn_summaries->get (to)->size += estimated_growth; - new_size = ipa_fn_summaries->get (to)->size; + ipa_size_summaries->get (to)->size += estimated_growth; + new_size = ipa_size_summaries->get (to)->size; if (callee->calls_comdat_local) to->calls_comdat_local = true; diff -Nurp a/gcc/ipa-predicate.c b/gcc/ipa-predicate.c --- a/gcc/ipa-predicate.c 2020-04-30 15:14:04.620000000 +0800 +++ b/gcc/ipa-predicate.c 2020-04-30 15:14:56.620000000 +0800 @@ -33,9 +33,36 @@ along with GCC; see the file COPYING3. #include "fold-const.h" #include "tree-pretty-print.h" #include "gimple.h" +#include "gimplify.h" #include "data-streamer.h" +/* Check whether two set of operations have same effects. */ +static bool +expr_eval_ops_equal_p (expr_eval_ops ops1, expr_eval_ops ops2) +{ + if (ops1) + { + if (!ops2 || ops1->length () != ops2->length ()) + return false; + + for (unsigned i = 0; i < ops1->length (); i++) + { + expr_eval_op &op1 = (*ops1)[i]; + expr_eval_op &op2 = (*ops2)[i]; + + if (op1.code != op2.code + || op1.index != op2.index + || !vrp_operand_equal_p (op1.val[0], op2.val[0]) + || !vrp_operand_equal_p (op1.val[1], op2.val[1]) + || !types_compatible_p (op1.type, op2.type)) + return false; + } + return true; + } + return !ops2; +} + /* Add clause CLAUSE into the predicate P. When CONDITIONS is NULL do not perform checking whether NEW_CLAUSE is obviously true. This is useful only when NEW_CLAUSE is known to be @@ -110,14 +137,16 @@ predicate::add_clause (conditions condit for (c2 = c1 + 1; c2 < num_conditions; c2++) if (new_clause & (1 << c2)) { - condition *cc1 = - &(*conditions)[c1 - predicate::first_dynamic_condition]; condition *cc2 = &(*conditions)[c2 - predicate::first_dynamic_condition]; if (cc1->operand_num == cc2->operand_num - && cc1->val == cc2->val + && vrp_operand_equal_p (cc1->val, cc2->val) && cc2->code != is_not_constant - && cc2->code != predicate::changed + && cc2->code != changed + && expr_eval_ops_equal_p (cc1->param_ops, cc2->param_ops) + && cc2->agg_contents == cc1->agg_contents + && cc2->by_ref == cc1->by_ref + && types_compatible_p (cc2->type, cc1->type) && cc1->code == invert_tree_comparison (cc2->code, HONOR_NANS (cc1->val))) return; @@ -300,6 +329,83 @@ dump_condition (FILE *f, conditions cond if (c->agg_contents) fprintf (f, "[%soffset: " HOST_WIDE_INT_PRINT_DEC "]", c->by_ref ? "ref " : "", c->offset); + + for (unsigned i = 0; i < vec_safe_length (c->param_ops); i++) + { + expr_eval_op &op = (*(c->param_ops))[i]; + const char *op_name = op_symbol_code (op.code); + + if (op_name == op_symbol_code (ERROR_MARK)) + op_name = get_tree_code_name (op.code); + + fprintf (f, ",("); + + if (!op.val[0]) + { + switch (op.code) + { + case FLOAT_EXPR: + case FIX_TRUNC_EXPR: + case FIXED_CONVERT_EXPR: + case VIEW_CONVERT_EXPR: + CASE_CONVERT: + if (op.code == VIEW_CONVERT_EXPR) + fprintf (f, "VCE"); + fprintf (f, "("); + print_generic_expr (f, op.type); + fprintf (f, ")" ); + break; + + default: + fprintf (f, "%s", op_name); + } + fprintf (f, " #"); + } + else if (!op.val[1]) + { + if (op.index) + { + print_generic_expr (f, op.val[0]); + fprintf (f, " %s #", op_name); + } + else + { + fprintf (f, "# %s ", op_name); + print_generic_expr (f, op.val[0]); + } + } + else + { + fprintf (f, "%s ", op_name); + switch (op.index) + { + case 0: + fprintf (f, "#, "); + print_generic_expr (f, op.val[0]); + fprintf (f, ", "); + print_generic_expr (f, op.val[1]); + break; + + case 1: + print_generic_expr (f, op.val[0]); + fprintf (f, ", #, "); + print_generic_expr (f, op.val[1]); + break; + + case 2: + print_generic_expr (f, op.val[0]); + fprintf (f, ", "); + print_generic_expr (f, op.val[1]); + fprintf (f, ", #"); + break; + + default: + fprintf (f, "*, *, *"); + } + } + fprintf (f, ")"); + } + if (c->code == predicate::is_not_constant) { fprintf (f, " not constant"); @@ -398,8 +504,9 @@ predicate::remap_after_duplication (clau for other purposes). */ predicate -predicate::remap_after_inlining (struct ipa_fn_summary *info, - struct ipa_fn_summary *callee_info, +predicate::remap_after_inlining (class ipa_fn_summary *info, + class ipa_node_params *params_summary, + class ipa_fn_summary *callee_info, vec operand_map, vec offset_map, clause_t possible_truths, @@ -460,10 +567,10 @@ predicate::remap_after_inlining (struct ap.offset = c->offset + offset_delta; ap.agg_contents = c->agg_contents; ap.by_ref = c->by_ref; - cond_predicate = add_condition (info, + cond_predicate = add_condition (info, params_summary, operand_map[c->operand_num], - c->size, &ap, c->code, - c->val); + c->type, &ap, c->code, + c->val, c->param_ops); } } /* Fixed conditions remains same, construct single @@ -483,7 +590,7 @@ predicate::remap_after_inlining (struct /* Read predicate from IB. */ void -predicate::stream_in (struct lto_input_block *ib) +predicate::stream_in (class lto_input_block *ib) { clause_t clause; int k = 0; @@ -516,21 +623,28 @@ predicate::stream_out (struct output_blo } -/* Add condition to condition list SUMMARY. OPERAND_NUM, SIZE, CODE and VAL - correspond to fields of condition structure. AGGPOS describes whether the - used operand is loaded from an aggregate and where in the aggregate it is. - It can be NULL, which means this not a load from an aggregate. */ +/* Add condition to condition list SUMMARY. OPERAND_NUM, TYPE, CODE, VAL and + PARAM_OPS correspond to fields of condition structure. AGGPOS describes + whether the used operand is loaded from an aggregate and where in the + aggregate it is. It can be NULL, which means this not a load from an + aggregate. */ predicate -add_condition (struct ipa_fn_summary *summary, int operand_num, - HOST_WIDE_INT size, struct agg_position_info *aggpos, - enum tree_code code, tree val) +add_condition (class ipa_fn_summary *summary, + class ipa_node_params *params_summary, + int operand_num, + tree type, struct agg_position_info *aggpos, + enum tree_code code, tree val, expr_eval_ops param_ops) { - int i; + int i, j; struct condition *c; struct condition new_cond; HOST_WIDE_INT offset; bool agg_contents, by_ref; + expr_eval_op *op; + + if (params_summary) + ipa_set_param_used_by_ipa_predicates (params_summary, operand_num, true); if (aggpos) { @@ -549,10 +663,11 @@ add_condition (struct ipa_fn_summary *su for (i = 0; vec_safe_iterate (summary->conds, i, &c); i++) { if (c->operand_num == operand_num - && c->size == size && c->code == code - && c->val == val + && types_compatible_p (c->type, type) + && vrp_operand_equal_p (c->val, val) && c->agg_contents == agg_contents + && expr_eval_ops_equal_p (c->param_ops, param_ops) && (!agg_contents || (c->offset == offset && c->by_ref == by_ref))) return predicate::predicate_testing_cond (i); } @@ -562,11 +677,21 @@ add_condition (struct ipa_fn_summary *su new_cond.operand_num = operand_num; new_cond.code = code; - new_cond.val = val; + new_cond.type = unshare_expr_without_location (type); + new_cond.val = val ? unshare_expr_without_location (val) : val; new_cond.agg_contents = agg_contents; new_cond.by_ref = by_ref; new_cond.offset = offset; - new_cond.size = size; + new_cond.param_ops = vec_safe_copy (param_ops); + + for (j = 0; vec_safe_iterate (new_cond.param_ops, j, &op); j++) + { + if (op->val[0]) + op->val[0] = unshare_expr_without_location (op->val[0]); + if (op->val[1]) + op->val[1] = unshare_expr_without_location (op->val[1]); + } + vec_safe_push (summary->conds, new_cond); return predicate::predicate_testing_cond (i); diff -Nurp a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h --- a/gcc/ipa-predicate.h 2020-04-30 15:14:04.612000000 +0800 +++ b/gcc/ipa-predicate.h 2020-04-30 15:14:56.620000000 +0800 @@ -22,16 +22,36 @@ along with GCC; see the file COPYING3. inlined into (i.e. known constant values of function parameters. Conditions that are interesting for function body are collected into CONDS - vector. They are of simple for function_param OP VAL, where VAL is - IPA invariant. The conditions are then referred by predicates. */ + vector. They are of simple as kind of a mathematical transformation on + function parameter, T(function_param), in which the parameter occurs only + once, and other operands are IPA invariant. The conditions are then + referred by predicates. */ + + +/* A simplified representation of tree node, for unary, binary and ternary + operation. Computations on parameter are decomposed to a series of this + kind of structure. */ +struct GTY(()) expr_eval_op +{ + /* Result type of expression. */ + tree type; + /* Constant operands in expression, there are at most two. */ + tree val[2]; + /* Index of parameter operand in expression. */ + unsigned index : 2; + /* Operation code of expression. */ + ENUM_BITFIELD(tree_code) code : 16; +}; + +typedef vec *expr_eval_ops; struct GTY(()) condition { /* If agg_contents is set, this is the offset from which the used data was loaded. */ HOST_WIDE_INT offset; - /* Size of the access reading the data (or the PARM_DECL SSA_NAME). */ - HOST_WIDE_INT size; + /* Type of the access reading the data (or the PARM_DECL SSA_NAME). */ + tree type; tree val; int operand_num; ENUM_BITFIELD(tree_code) code : 16; @@ -41,6 +61,9 @@ struct GTY(()) condition /* If agg_contents is set, this differentiates between loads from data passed by reference and by value. */ unsigned by_ref : 1; + /* A set of sequential operations on the parameter, which can be seen as + a mathmatical function on the parameter. */ + expr_eval_ops param_ops; }; /* Information kept about parameter of call site. */ @@ -54,6 +77,14 @@ struct inline_param_summary Value 0 is reserved for compile time invariants. */ int change_prob; + bool equal_to (const inline_param_summary &other) const + { + return change_prob == other.change_prob; + } + bool useless_p (void) const + { + return change_prob == REG_BR_PROB_BASE; + } }; typedef vec *conditions; @@ -205,11 +236,12 @@ public: predicate remap_after_duplication (clause_t); /* Return predicate equal to THIS after inlining. */ - predicate remap_after_inlining (struct ipa_fn_summary *, - struct ipa_fn_summary *, + predicate remap_after_inlining (class ipa_fn_summary *, + class ipa_node_params *params_summary, + class ipa_fn_summary *, vec, vec, clause_t, const predicate &); - void stream_in (struct lto_input_block *); + void stream_in (class lto_input_block *); void stream_out (struct output_block *); private: @@ -227,6 +259,9 @@ private: }; void dump_condition (FILE *f, conditions conditions, int cond); -predicate add_condition (struct ipa_fn_summary *summary, int operand_num, - HOST_WIDE_INT size, struct agg_position_info *aggpos, - enum tree_code code, tree val); +predicate add_condition (class ipa_fn_summary *summary, + class ipa_node_params *params_summary, + int operand_num, + tree type, struct agg_position_info *aggpos, + enum tree_code code, tree val, + expr_eval_ops param_ops = NULL); diff -Nurp a/gcc/ipa-profile.c b/gcc/ipa-profile.c --- a/gcc/ipa-profile.c 2020-04-30 15:14:04.632000000 +0800 +++ b/gcc/ipa-profile.c 2020-04-30 15:14:56.652000000 +0800 @@ -326,8 +326,8 @@ ipa_propagate_frequency_1 (struct cgraph if (profile_info && !(edge->callee->count.ipa () == profile_count::zero ()) && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED - || (edge->caller->global.inlined_to - && edge->caller->global.inlined_to->frequency + || (edge->caller->inlined_to + && edge->caller->inlined_to->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED))) d->maybe_unlikely_executed = false; if (edge->count.ipa ().initialized_p () @@ -477,6 +477,29 @@ ipa_propagate_frequency (struct cgraph_n return changed; } +/* Check that number of arguments of N agrees with E. + Be conservative when summaries are not present. */ + +static bool +check_argument_count (struct cgraph_node *n, struct cgraph_edge *e) +{ + if (!ipa_node_params_sum || !ipa_edge_args_sum) + return true; + class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ()); + if (!info) + return true; + if (!info->descriptors) + return true; + ipa_edge_args *e_info = IPA_EDGE_REF (e); + if (!e) + return true; + if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info) + && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info) + || !stdarg_p (TREE_TYPE (n->decl)))) + return false; + return true; +} + /* Simple ipa profile pass propagating frequencies across the callgraph. */ static unsigned int @@ -600,14 +623,7 @@ ipa_profile (void) "Not speculating: target is overwritable " "and can be discarded.\n"); } - else if (ipa_node_params_sum && ipa_edge_args_sum - && (!vec_safe_is_empty - (IPA_NODE_REF (n2)->descriptors)) - && ipa_get_param_count (IPA_NODE_REF (n2)) - != ipa_get_cs_argument_count (IPA_EDGE_REF (e)) - && (ipa_get_param_count (IPA_NODE_REF (n2)) - >= ipa_get_cs_argument_count (IPA_EDGE_REF (e)) - || !stdarg_p (TREE_TYPE (n2->decl)))) + else if (!check_argument_count (n2, e)) { nmismatch++; if (dump_file) diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c --- a/gcc/ipa-prop.c 2020-04-30 15:14:04.616000000 +0800 +++ b/gcc/ipa-prop.c 2020-04-30 15:14:56.676000000 +0800 @@ -203,7 +203,7 @@ ipa_get_param_decl_index_1 (vecdescriptors, ptree); } @@ -227,8 +227,10 @@ ipa_populate_param_decls (struct cgraph_ for (parm = fnargs; parm; parm = DECL_CHAIN (parm)) { descriptors[param_num].decl_or_type = parm; - descriptors[param_num].move_cost = estimate_move_cost (TREE_TYPE (parm), - true); + unsigned int cost = estimate_move_cost (TREE_TYPE (parm), true); + descriptors[param_num].move_cost = cost; + /* Watch overflow, move_cost is a bitfield. */ + gcc_checking_assert (cost == descriptors[param_num].move_cost); param_num++; } } @@ -253,7 +255,7 @@ count_formal_params (tree fndecl) using ipa_initialize_node_params. */ void -ipa_dump_param (FILE *file, struct ipa_node_params *info, int i) +ipa_dump_param (FILE *file, class ipa_node_params *info, int i) { fprintf (file, "param #%i", i); if ((*info->descriptors)[i].decl_or_type) @@ -269,7 +271,7 @@ ipa_dump_param (FILE *file, struct ipa_n static bool ipa_alloc_node_params (struct cgraph_node *node, int param_count) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF_GET_CREATE (node); if (!info->descriptors && param_count) { @@ -287,7 +289,7 @@ ipa_alloc_node_params (struct cgraph_nod void ipa_initialize_node_params (struct cgraph_node *node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF_GET_CREATE (node); if (!info->descriptors && ipa_alloc_node_params (node, count_formal_params (node->decl))) @@ -359,23 +361,50 @@ ipa_print_node_jump_functions_for_edge ( fprintf (f, " Aggregate passed by %s:\n", jump_func->agg.by_ref ? "reference" : "value"); - FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, j, item) + FOR_EACH_VEC_ELT (*jump_func->agg.items, j, item) { fprintf (f, " offset: " HOST_WIDE_INT_PRINT_DEC ", ", item->offset); - if (TYPE_P (item->value)) - fprintf (f, "clobber of " HOST_WIDE_INT_PRINT_DEC " bits", - tree_to_uhwi (TYPE_SIZE (item->value))); - else + fprintf (f, "type: "); + print_generic_expr (f, item->type); + fprintf (f, ", "); + if (item->jftype == IPA_JF_PASS_THROUGH) + fprintf (f, "PASS THROUGH: %d,", + item->value.pass_through.formal_id); + else if (item->jftype == IPA_JF_LOAD_AGG) + { + fprintf (f, "LOAD AGG: %d", + item->value.pass_through.formal_id); + fprintf (f, " [offset: " HOST_WIDE_INT_PRINT_DEC ", by %s],", + item->value.load_agg.offset, + item->value.load_agg.by_ref ? "reference" + : "value"); + } + + if (item->jftype == IPA_JF_PASS_THROUGH + || item->jftype == IPA_JF_LOAD_AGG) + { + fprintf (f, " op %s", + get_tree_code_name (item->value.pass_through.operation)); + if (item->value.pass_through.operation != NOP_EXPR) + { + fprintf (f, " "); + print_generic_expr (f, item->value.pass_through.operand); + } + } + else if (item->jftype == IPA_JF_CONST) { - fprintf (f, "cst: "); - print_generic_expr (f, item->value); + fprintf (f, "CONST: "); + print_generic_expr (f, item->value.constant); } + else if (item->jftype == IPA_JF_UNKNOWN) + fprintf (f, "UNKNOWN: " HOST_WIDE_INT_PRINT_DEC " bits", + tree_to_uhwi (TYPE_SIZE (item->type))); fprintf (f, "\n"); } } - struct ipa_polymorphic_call_context *ctx + class ipa_polymorphic_call_context *ctx = ipa_get_ith_polymorhic_call_context (IPA_EDGE_REF (cs), i); if (ctx && !ctx->useless_p ()) { @@ -432,7 +461,7 @@ ipa_print_node_jump_functions (FILE *f, for (cs = node->indirect_calls; cs; cs = cs->next_callee) { - struct cgraph_indirect_call_info *ii; + class cgraph_indirect_call_info *ii; if (!ipa_edge_args_info_available_for_edge_p (cs)) continue; @@ -1059,7 +1088,7 @@ bool ipa_load_from_parm_agg (struct ipa_func_body_info *fbi, vec *descriptors, gimple *stmt, tree op, int *index_p, - HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p, + HOST_WIDE_INT *offset_p, poly_int64 *size_p, bool *by_ref_p, bool *guaranteed_unmodified) { int index; @@ -1135,6 +1164,67 @@ ipa_load_from_parm_agg (struct ipa_func_ return false; } +/* If STMT is an assignment that loads a value from a parameter declaration, + or from an aggregate passed as the parameter either by value or reference, + return the index of the parameter in ipa_node_params. Otherwise return -1. + + FBI holds gathered information about the function. INFO describes + parameters of the function, STMT is the assignment statement. If it is a + memory load from an aggregate, *OFFSET_P is filled with offset within the + aggregate, and *BY_REF_P specifies whether the aggregate is passed by + reference. */ + +static int +load_from_unmodified_param_or_agg (struct ipa_func_body_info *fbi, + class ipa_node_params *info, + gimple *stmt, + HOST_WIDE_INT *offset_p, + bool *by_ref_p) +{ + int index = load_from_unmodified_param (fbi, info->descriptors, stmt); + poly_int64 size; + + /* Load value from a parameter declaration. */ + if (index >= 0) + { + *offset_p = -1; + return index; + } + + if (!gimple_assign_load_p (stmt)) + return -1; + + tree rhs = gimple_assign_rhs1 (stmt); + + /* Skip memory reference containing VIEW_CONVERT_EXPR. */ + for (tree t = rhs; handled_component_p (t); t = TREE_OPERAND (t, 0)) + if (TREE_CODE (t) == VIEW_CONVERT_EXPR) + return -1; + + /* Skip memory reference containing bit-field. */ + if (TREE_CODE (rhs) == BIT_FIELD_REF + || contains_bitfld_component_ref_p (rhs)) + return -1; + + if (!ipa_load_from_parm_agg (fbi, info->descriptors, stmt, rhs, &index, + offset_p, &size, by_ref_p)) + return -1; + + gcc_assert (!maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (rhs))), + size)); + if (!*by_ref_p) + { + tree param_type = ipa_get_type (info, index); + + if (!param_type || !AGGREGATE_TYPE_P (param_type)) + return -1; + } + else if (TREE_THIS_VOLATILE (rhs)) + return -1; + + return index; +} + /* Given that an actual argument is an SSA_NAME (given in NAME) and is a result of an assignment statement STMT, try to determine whether we are actually handling any of the following cases and construct an appropriate jump @@ -1190,7 +1280,7 @@ ipa_load_from_parm_agg (struct ipa_func_ static void compute_complex_assign_jump_func (struct ipa_func_body_info *fbi, - struct ipa_node_params *info, + class ipa_node_params *info, struct ipa_jump_func *jfunc, gcall *call, gimple *stmt, tree name, tree param_type) @@ -1346,7 +1436,7 @@ get_ancestor_addr_info (gimple *assign, static void compute_complex_ancestor_jump_func (struct ipa_func_body_info *fbi, - struct ipa_node_params *info, + class ipa_node_params *info, struct ipa_jump_func *jfunc, gcall *call, gphi *phi) { @@ -1440,11 +1530,11 @@ type_like_member_ptr_p (tree type, tree } /* If RHS is an SSA_NAME and it is defined by a simple copy assign statement, - return the rhs of its defining statement. Otherwise return RHS as it - is. */ + return the rhs of its defining statement, and this statement is stored in + *RHS_STMT. Otherwise return RHS as it is. */ static inline tree -get_ssa_def_if_simple_copy (tree rhs) +get_ssa_def_if_simple_copy (tree rhs, gimple **rhs_stmt) { while (TREE_CODE (rhs) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (rhs)) { @@ -1454,100 +1544,323 @@ get_ssa_def_if_simple_copy (tree rhs) rhs = gimple_assign_rhs1 (def_stmt); else break; + *rhs_stmt = def_stmt; } return rhs; } -/* Simple linked list, describing known contents of an aggregate beforere - call. */ +/* Simple linked list, describing contents of an aggregate before call. */ struct ipa_known_agg_contents_list { /* Offset and size of the described part of the aggregate. */ HOST_WIDE_INT offset, size; - /* Known constant value or NULL if the contents is known to be unknown. */ - tree constant; + + /* Type of the described part of the aggregate. */ + tree type; + + /* Known constant value or jump function data describing contents. */ + struct ipa_load_agg_data value; + /* Pointer to the next structure in the list. */ struct ipa_known_agg_contents_list *next; }; -/* Find the proper place in linked list of ipa_known_agg_contents_list - structures where to put a new one with the given LHS_OFFSET and LHS_SIZE, - unless there is a partial overlap, in which case return NULL, or such - element is already there, in which case set *ALREADY_THERE to true. */ - -static struct ipa_known_agg_contents_list ** -get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list, - HOST_WIDE_INT lhs_offset, - HOST_WIDE_INT lhs_size, - bool *already_there) +/* Add an aggregate content item into a linked list of + ipa_known_agg_contents_list structure, in which all elements + are sorted ascendingly by offset. */ + +static inline void +add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist, + struct ipa_known_agg_contents_list *item) { - struct ipa_known_agg_contents_list **p = list; - while (*p && (*p)->offset < lhs_offset) + struct ipa_known_agg_contents_list *list = *plist; + + for (; list; list = list->next) { - if ((*p)->offset + (*p)->size > lhs_offset) - return NULL; - p = &(*p)->next; + if (list->offset >= item->offset) + break; + + plist = &list->next; } - if (*p && (*p)->offset < lhs_offset + lhs_size) + item->next = list; + *plist = item; +} + +/* Check whether a given aggregate content is clobbered by certain element in + a linked list of ipa_known_agg_contents_list. */ + +static inline bool +clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list, + struct ipa_known_agg_contents_list *item) +{ + for (; list; list = list->next) { - if ((*p)->offset == lhs_offset && (*p)->size == lhs_size) - /* We already know this value is subsequently overwritten with - something else. */ - *already_there = true; - else - /* Otherwise this is a partial overlap which we cannot - represent. */ - return NULL; + if (list->offset >= item->offset) + return list->offset < item->offset + item->size; + + if (list->offset + list->size > item->offset) + return true; } - return p; + + return false; } /* Build aggregate jump function from LIST, assuming there are exactly - CONST_COUNT constant entries there and that th offset of the passed argument + VALUE_COUNT entries there and that offset of the passed argument is ARG_OFFSET and store it into JFUNC. */ static void build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list, - int const_count, HOST_WIDE_INT arg_offset, + int value_count, HOST_WIDE_INT arg_offset, struct ipa_jump_func *jfunc) { - vec_alloc (jfunc->agg.items, const_count); - while (list) + vec_alloc (jfunc->agg.items, value_count); + for (; list; list = list->next) + { + struct ipa_agg_jf_item item; + tree operand = list->value.pass_through.operand; + + if (list->value.pass_through.formal_id >= 0) + { + /* Content value is derived from some formal paramerter. */ + if (list->value.offset >= 0) + item.jftype = IPA_JF_LOAD_AGG; + else + item.jftype = IPA_JF_PASS_THROUGH; + + item.value.load_agg = list->value; + if (operand) + item.value.pass_through.operand + = unshare_expr_without_location (operand); + } + else if (operand) + { + /* Content value is known constant. */ + item.jftype = IPA_JF_CONST; + item.value.constant = unshare_expr_without_location (operand); + } + else + continue; + + item.type = list->type; + gcc_assert (tree_to_shwi (TYPE_SIZE (list->type)) == list->size); + + item.offset = list->offset - arg_offset; + gcc_assert ((item.offset % BITS_PER_UNIT) == 0); + + jfunc->agg.items->quick_push (item); + } +} + +/* Given an assignment statement STMT, try to collect information into + AGG_VALUE that will be used to construct jump function for RHS of the + assignment, from which content value of an aggregate part comes. + + Besides constant and simple pass-through jump functions, also try to + identify whether it matches the following pattern that can be described by + a load-value-from-aggregate jump function, which is a derivative of simple + pass-through jump function. + + foo (int *p) + { + ... + + *(q_5 + 4) = *(p_3(D) + 28) op 1; + bar (q_5); + } + + Here IPA_LOAD_AGG_DATA data structure is informative enough to describe + constant, simple pass-through and load-vale-from-aggregate. If value + is constant, it will be kept in field OPERAND, and field FORMAL_ID is + set to -1. For simple pass-through and load-value-from-aggregate, field + FORMAL_ID specifies the related formal parameter index, and field + OFFSET can be used to distinguish them, -1 means simple pass-through, + otherwise means load-value-from-aggregate. */ + +static void +analyze_agg_content_value (struct ipa_func_body_info *fbi, + struct ipa_load_agg_data *agg_value, + gimple *stmt) +{ + tree lhs = gimple_assign_lhs (stmt); + tree rhs1 = gimple_assign_rhs1 (stmt); + enum tree_code code; + int index = -1; + + /* Initialize jump function data for the aggregate part. */ + memset (agg_value, 0, sizeof (*agg_value)); + agg_value->pass_through.operation = NOP_EXPR; + agg_value->pass_through.formal_id = -1; + agg_value->offset = -1; + + if (AGGREGATE_TYPE_P (TREE_TYPE (lhs)) /* TODO: Support aggregate type. */ + || TREE_THIS_VOLATILE (lhs) + || TREE_CODE (lhs) == BIT_FIELD_REF + || contains_bitfld_component_ref_p (lhs)) + return; + + /* Skip SSA copies. */ + while (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) + { + if (TREE_CODE (rhs1) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (rhs1)) + break; + + stmt = SSA_NAME_DEF_STMT (rhs1); + if (!is_gimple_assign (stmt)) + return; + + rhs1 = gimple_assign_rhs1 (stmt); + } + + code = gimple_assign_rhs_code (stmt); + switch (gimple_assign_rhs_class (stmt)) { - if (list->constant) + case GIMPLE_SINGLE_RHS: + if (is_gimple_ip_invariant (rhs1)) { - struct ipa_agg_jf_item item; - item.offset = list->offset - arg_offset; - gcc_assert ((item.offset % BITS_PER_UNIT) == 0); - item.value = unshare_expr_without_location (list->constant); - jfunc->agg.items->quick_push (item); + agg_value->pass_through.operand = rhs1; + return; } - list = list->next; + code = NOP_EXPR; + break; + + case GIMPLE_UNARY_RHS: + /* NOTE: A GIMPLE_UNARY_RHS operation might not be tcc_unary + (truth_not_expr is example), GIMPLE_BINARY_RHS does not imply + tcc_binary, this subtleness is somewhat misleading. + + Since tcc_unary is widely used in IPA-CP code to check an operation + with one operand, here we only allow tc_unary operation to avoid + possible problem. Then we can use (opclass == tc_unary) or not to + distinguish unary and binary. */ + if (TREE_CODE_CLASS (code) != tcc_unary || CONVERT_EXPR_CODE_P (code)) + return; + + rhs1 = get_ssa_def_if_simple_copy (rhs1, &stmt); + break; + + case GIMPLE_BINARY_RHS: + { + gimple *rhs1_stmt = stmt; + gimple *rhs2_stmt = stmt; + tree rhs2 = gimple_assign_rhs2 (stmt); + + rhs1 = get_ssa_def_if_simple_copy (rhs1, &rhs1_stmt); + rhs2 = get_ssa_def_if_simple_copy (rhs2, &rhs2_stmt); + + if (is_gimple_ip_invariant (rhs2)) + { + agg_value->pass_through.operand = rhs2; + stmt = rhs1_stmt; + } + else if (is_gimple_ip_invariant (rhs1)) + { + if (TREE_CODE_CLASS (code) == tcc_comparison) + code = swap_tree_comparison (code); + else if (!commutative_tree_code (code)) + return; + + agg_value->pass_through.operand = rhs1; + stmt = rhs2_stmt; + rhs1 = rhs2; + } + else + return; + + if (TREE_CODE_CLASS (code) != tcc_comparison + && !useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs1))) + return; + } + break; + + default: + return; + } + + if (TREE_CODE (rhs1) != SSA_NAME) + index = load_from_unmodified_param_or_agg (fbi, fbi->info, stmt, + &agg_value->offset, + &agg_value->by_ref); + else if (SSA_NAME_IS_DEFAULT_DEF (rhs1)) + index = ipa_get_param_decl_index (fbi->info, SSA_NAME_VAR (rhs1)); + + if (index >= 0) + { + if (agg_value->offset >= 0) + agg_value->type = TREE_TYPE (rhs1); + agg_value->pass_through.formal_id = index; + agg_value->pass_through.operation = code; } + else + agg_value->pass_through.operand = NULL_TREE; +} + +/* If STMT is a memory store to the object whose address is BASE, extract + information (offset, size, and value) into CONTENT, and return true, + otherwise we conservatively assume the whole object is modified with + unknown content, and return false. CHECK_REF means that access to object + is expected to be in form of MEM_REF expression. */ + +static bool +extract_mem_content (struct ipa_func_body_info *fbi, + gimple *stmt, tree base, bool check_ref, + struct ipa_known_agg_contents_list *content) +{ + HOST_WIDE_INT lhs_offset, lhs_size; + bool reverse; + + if (!is_gimple_assign (stmt)) + return false; + + tree lhs = gimple_assign_lhs (stmt); + tree lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, &lhs_size, + &reverse); + if (!lhs_base) + return false; + + if (check_ref) + { + if (TREE_CODE (lhs_base) != MEM_REF + || TREE_OPERAND (lhs_base, 0) != base + || !integer_zerop (TREE_OPERAND (lhs_base, 1))) + return false; + } + else if (lhs_base != base) + return false; + + content->offset = lhs_offset; + content->size = lhs_size; + content->type = TREE_TYPE (lhs); + content->next = NULL; + + analyze_agg_content_value (fbi, &content->value, stmt); + return true; } /* Traverse statements from CALL backwards, scanning whether an aggregate given - in ARG is filled in with constant values. ARG can either be an aggregate - expression or a pointer to an aggregate. ARG_TYPE is the type of the - aggregate. JFUNC is the jump function into which the constants are - subsequently stored. */ + in ARG is filled in constants or values that are derived from caller's + formal parameter in the way described by some kinds of jump functions. FBI + is the context of the caller function for interprocedural analysis. ARG can + either be an aggregate expression or a pointer to an aggregate. ARG_TYPE is + the type of the aggregate, JFUNC is the jump function for the aggregate. */ static void -determine_locally_known_aggregate_parts (gcall *call, tree arg, - tree arg_type, - struct ipa_jump_func *jfunc) -{ - struct ipa_known_agg_contents_list *list = NULL; - int item_count = 0, const_count = 0; +determine_known_aggregate_parts (struct ipa_func_body_info *fbi, + gcall *call, tree arg, + tree arg_type, + struct ipa_jump_func *jfunc) +{ + struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL; + bitmap visited = NULL; + int item_count = 0, value_count = 0; HOST_WIDE_INT arg_offset, arg_size; - gimple_stmt_iterator gsi; tree arg_base; bool check_ref, by_ref; ao_ref r; - if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0) + if ( PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0) return; /* The function operates in three stages. First, we prepare check_ref, r, @@ -1606,91 +1919,73 @@ determine_locally_known_aggregate_parts ao_ref_init (&r, arg); } - /* Second stage walks back the BB, looks at individual statements and as long - as it is confident of how the statements affect contents of the - aggregates, it builds a sorted linked list of ipa_agg_jf_list structures - describing it. */ - gsi = gsi_for_stmt (call); - gsi_prev (&gsi); - for (; !gsi_end_p (gsi); gsi_prev (&gsi)) - { - struct ipa_known_agg_contents_list *n, **p; - gimple *stmt = gsi_stmt (gsi); - HOST_WIDE_INT lhs_offset, lhs_size; - tree lhs, rhs, lhs_base; - bool reverse; - - if (!stmt_may_clobber_ref_p_1 (stmt, &r)) - continue; - if (!gimple_assign_single_p (stmt)) - break; - - lhs = gimple_assign_lhs (stmt); - rhs = gimple_assign_rhs1 (stmt); - if (!is_gimple_reg_type (TREE_TYPE (rhs)) - || TREE_CODE (lhs) == BIT_FIELD_REF - || contains_bitfld_component_ref_p (lhs)) - break; + /* Second stage traverses virtual SSA web backwards starting from the call + statement, only looks at individual dominating virtual operand (its + definition dominates the call), as long as it is confident that content + of the aggregate is affected by definition of the virtual operand, it + builds a sorted linked list of ipa_agg_jf_list describing that. */ - lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, - &lhs_size, &reverse); - if (!lhs_base) - break; + for (tree dom_vuse = gimple_vuse (call); dom_vuse;) + { + gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse); - if (check_ref) + if (gimple_code (stmt) == GIMPLE_PHI) { - if (TREE_CODE (lhs_base) != MEM_REF - || TREE_OPERAND (lhs_base, 0) != arg_base - || !integer_zerop (TREE_OPERAND (lhs_base, 1))) - break; + dom_vuse = get_continuation_for_phi (stmt, &r, true, + fbi->aa_walk_budget, + &visited, false, NULL, NULL); + continue; } - else if (lhs_base != arg_base) + + if (stmt_may_clobber_ref_p_1 (stmt, &r)) { - if (DECL_P (lhs_base)) - continue; - else + struct ipa_known_agg_contents_list *content + = XALLOCA (struct ipa_known_agg_contents_list); + + if (!extract_mem_content (fbi, stmt, arg_base, check_ref, content)) break; - } - bool already_there = false; - p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size, - &already_there); - if (!p) - break; - if (already_there) - continue; + /* Now we get a dominating virtual operand, and need to check + whether its value is clobbered any other dominating one. */ + if ((content->value.pass_through.formal_id >= 0 + || content->value.pass_through.operand) + && !clobber_by_agg_contents_list_p (all_list, content)) + { + struct ipa_known_agg_contents_list *copy + = XALLOCA (struct ipa_known_agg_contents_list); - rhs = get_ssa_def_if_simple_copy (rhs); - n = XALLOCA (struct ipa_known_agg_contents_list); - n->size = lhs_size; - n->offset = lhs_offset; - if (is_gimple_ip_invariant (rhs)) - { - n->constant = rhs; - const_count++; + /* Add to the list consisting of only dominating virtual + operands, whose definitions can finally reach the call. */ + add_to_agg_contents_list (&list, (*copy = *content, copy)); + + if (++value_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) + break; + } + + /* Add to the list consisting of all dominating virtual operands. */ + add_to_agg_contents_list (&all_list, content); + + if (++item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) + break; } - else - n->constant = NULL_TREE; - n->next = *p; - *p = n; - - item_count++; - if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) - || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) - break; - } + dom_vuse = gimple_vuse (stmt); + } + + if (visited) + BITMAP_FREE (visited); /* Third stage just goes over the list and creates an appropriate vector of - ipa_agg_jf_item structures out of it, of sourse only if there are - any known constants to begin with. */ + ipa_agg_jf_item structures out of it, of course only if there are + any meaningful items to begin with. */ - if (const_count) + if (value_count) { jfunc->agg.by_ref = by_ref; - build_agg_jump_func_from_list (list, const_count, arg_offset, jfunc); + build_agg_jump_func_from_list (list, value_count, arg_offset, jfunc); } } + /* Return the Ith param type of callee associated with call graph edge E. */ @@ -1797,7 +2092,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enu jf->m_vr = ipa_get_value_range (type, min, max); } -/* Assign to JF a pointer to a value_range just liek TMP but either fetch a +/* Assign to JF a pointer to a value_range just like TMP but either fetch a copy from ipa_vr_hash_table or allocate a new on in GC memory. */ static void @@ -1814,8 +2109,8 @@ static void ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, struct cgraph_edge *cs) { - struct ipa_node_params *info = IPA_NODE_REF (cs->caller); - struct ipa_edge_args *args = IPA_EDGE_REF (cs); + class ipa_node_params *info = IPA_NODE_REF (cs->caller); + class ipa_edge_args *args = IPA_EDGE_REF_GET_CREATE (cs); gcall *call = cs->call_stmt; int n, arg_num = gimple_call_num_args (call); bool useful_context = false; @@ -1839,7 +2134,7 @@ ipa_compute_jump_functions_for_edge (str if (flag_devirtualize && POINTER_TYPE_P (TREE_TYPE (arg))) { tree instance; - struct ipa_polymorphic_call_context context (cs->caller->decl, + class ipa_polymorphic_call_context context (cs->caller->decl, arg, cs->call_stmt, &instance); context.get_dynamic_type (instance, arg, NULL, cs->call_stmt, @@ -1978,7 +2273,7 @@ ipa_compute_jump_functions_for_edge (str || !ipa_get_jf_ancestor_agg_preserved (jfunc)) && (AGGREGATE_TYPE_P (TREE_TYPE (arg)) || POINTER_TYPE_P (param_type))) - determine_locally_known_aggregate_parts (call, arg, param_type, jfunc); + determine_known_aggregate_parts (fbi, call, arg, param_type, jfunc); } if (!useful_context) vec_free (args->polymorphic_call_contexts); @@ -2076,11 +2371,12 @@ ipa_is_ssa_with_stmt_def (tree t) /* Find the indirect call graph edge corresponding to STMT and mark it as a call to a parameter number PARAM_INDEX. NODE is the caller. Return the - indirect call graph edge. */ + indirect call graph edge. + If POLYMORPHIC is true record is as a destination of polymorphic call. */ static struct cgraph_edge * ipa_note_param_call (struct cgraph_node *node, int param_index, - gcall *stmt) + gcall *stmt, bool polymorphic) { struct cgraph_edge *cs; @@ -2089,6 +2385,11 @@ ipa_note_param_call (struct cgraph_node cs->indirect_info->agg_contents = 0; cs->indirect_info->member_ptr = 0; cs->indirect_info->guaranteed_unmodified = 0; + ipa_set_param_used_by_indirect_call (IPA_NODE_REF (node), + param_index, true); + if (cs->indirect_info->polymorphic || polymorphic) + ipa_set_param_used_by_polymorphic_call + (IPA_NODE_REF (node), param_index, true); return cs; } @@ -2155,7 +2456,7 @@ static void ipa_analyze_indirect_call_uses (struct ipa_func_body_info *fbi, gcall *call, tree target) { - struct ipa_node_params *info = fbi->info; + class ipa_node_params *info = fbi->info; HOST_WIDE_INT offset; bool by_ref; @@ -2164,7 +2465,7 @@ ipa_analyze_indirect_call_uses (struct i tree var = SSA_NAME_VAR (target); int index = ipa_get_param_decl_index (info, var); if (index >= 0) - ipa_note_param_call (fbi->node, index, call); + ipa_note_param_call (fbi->node, index, call, false); return; } @@ -2176,7 +2477,8 @@ ipa_analyze_indirect_call_uses (struct i gimple_assign_rhs1 (def), &index, &offset, NULL, &by_ref, &guaranteed_unmodified)) { - struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call); + struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, + call, false); cs->indirect_info->offset = offset; cs->indirect_info->agg_contents = 1; cs->indirect_info->by_ref = by_ref; @@ -2277,7 +2579,8 @@ ipa_analyze_indirect_call_uses (struct i if (index >= 0 && parm_preserved_before_stmt_p (fbi, index, call, rec)) { - struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call); + struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, + call, false); cs->indirect_info->offset = offset; cs->indirect_info->agg_contents = 1; cs->indirect_info->member_ptr = 1; @@ -2306,7 +2609,7 @@ ipa_analyze_virtual_call_uses (struct ip if (TREE_CODE (obj) != SSA_NAME) return; - struct ipa_node_params *info = fbi->info; + class ipa_node_params *info = fbi->info; if (SSA_NAME_IS_DEFAULT_DEF (obj)) { struct ipa_jump_func jfunc; @@ -2337,8 +2640,9 @@ ipa_analyze_virtual_call_uses (struct ip return; } - struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call); - struct cgraph_indirect_call_info *ii = cs->indirect_info; + struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, + call, true); + class cgraph_indirect_call_info *ii = cs->indirect_info; ii->offset = anc_offset; ii->otr_token = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (target)); ii->otr_type = obj_type_ref_class (target); @@ -2410,7 +2714,7 @@ ipa_analyze_stmt_uses (struct ipa_func_b static bool visit_ref_for_mod_analysis (gimple *, tree op, tree, void *data) { - struct ipa_node_params *info = (struct ipa_node_params *) data; + class ipa_node_params *info = (class ipa_node_params *) data; op = get_base_address (op); if (op @@ -2458,7 +2762,7 @@ ipa_analyze_params_uses_in_bb (struct ip static void ipa_analyze_controlled_uses (struct cgraph_node *node) { - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); for (int i = 0; i < ipa_get_param_count (info); i++) { @@ -2550,11 +2854,11 @@ void ipa_analyze_node (struct cgraph_node *node) { struct ipa_func_body_info fbi; - struct ipa_node_params *info; + class ipa_node_params *info; ipa_check_create_node_params (); ipa_check_create_edge_args (); - info = IPA_NODE_REF (node); + info = IPA_NODE_REF_GET_CREATE (node); if (info->analysis_done) return; @@ -2610,22 +2914,96 @@ static void update_jump_functions_after_inlining (struct cgraph_edge *cs, struct cgraph_edge *e) { - struct ipa_edge_args *top = IPA_EDGE_REF (cs); - struct ipa_edge_args *args = IPA_EDGE_REF (e); + class ipa_edge_args *top = IPA_EDGE_REF (cs); + class ipa_edge_args *args = IPA_EDGE_REF (e); + if (!args) + return; int count = ipa_get_cs_argument_count (args); int i; for (i = 0; i < count; i++) { struct ipa_jump_func *dst = ipa_get_ith_jump_func (args, i); - struct ipa_polymorphic_call_context *dst_ctx + class ipa_polymorphic_call_context *dst_ctx = ipa_get_ith_polymorhic_call_context (args, i); + if (dst->agg.items) + { + struct ipa_agg_jf_item *item; + int j; + + FOR_EACH_VEC_ELT (*dst->agg.items, j, item) + { + int dst_fid; + struct ipa_jump_func *src; + + if (item->jftype != IPA_JF_PASS_THROUGH + && item->jftype != IPA_JF_LOAD_AGG) + continue; + + dst_fid = item->value.pass_through.formal_id; + if (!top || dst_fid >= ipa_get_cs_argument_count (top)) + { + item->jftype = IPA_JF_UNKNOWN; + continue; + } + + item->value.pass_through.formal_id = -1; + src = ipa_get_ith_jump_func (top, dst_fid); + if (src->type == IPA_JF_CONST) + { + if (item->jftype == IPA_JF_PASS_THROUGH + && item->value.pass_through.operation == NOP_EXPR) + { + item->jftype = IPA_JF_CONST; + item->value.constant = src->value.constant.value; + continue; + } + } + else if (src->type == IPA_JF_PASS_THROUGH + && src->value.pass_through.operation == NOP_EXPR) + { + if (item->jftype == IPA_JF_PASS_THROUGH + || !item->value.load_agg.by_ref + || src->value.pass_through.agg_preserved) + item->value.pass_through.formal_id + = src->value.pass_through.formal_id; + } + else if (src->type == IPA_JF_ANCESTOR) + { + if (item->jftype == IPA_JF_PASS_THROUGH) + { + if (!src->value.ancestor.offset) + item->value.pass_through.formal_id + = src->value.ancestor.formal_id; + } + else if (src->value.ancestor.agg_preserved) + { + gcc_checking_assert (item->value.load_agg.by_ref); + + item->value.pass_through.formal_id + = src->value.ancestor.formal_id; + item->value.load_agg.offset + += src->value.ancestor.offset; + } + } + + if (item->value.pass_through.formal_id < 0) + item->jftype = IPA_JF_UNKNOWN; + } + } + + if (!top) + { + ipa_set_jf_unknown (dst); + continue; + } + if (dst->type == IPA_JF_ANCESTOR) { struct ipa_jump_func *src; int dst_fid = dst->value.ancestor.formal_id; - struct ipa_polymorphic_call_context *src_ctx + class ipa_polymorphic_call_context *src_ctx = ipa_get_ith_polymorhic_call_context (top, dst_fid); /* Variable number of arguments can cause havoc if we try to access @@ -2641,7 +3019,7 @@ update_jump_functions_after_inlining (st if (src_ctx && !src_ctx->useless_p ()) { - struct ipa_polymorphic_call_context ctx = *src_ctx; + class ipa_polymorphic_call_context ctx = *src_ctx; /* TODO: Make type preserved safe WRT contexts. */ if (!ipa_get_jf_ancestor_type_preserved (dst)) @@ -2660,8 +3038,11 @@ update_jump_functions_after_inlining (st } } - if (src->agg.items - && (dst->value.ancestor.agg_preserved || !src->agg.by_ref)) + /* Parameter and argument in ancestor jump function must be pointer + type, which means access to aggregate must be by-reference. */ + gcc_assert (!src->agg.items || src->agg.by_ref); + + if (src->agg.items && dst->value.ancestor.agg_preserved) { struct ipa_agg_jf_item *item; int j; @@ -2705,18 +3086,18 @@ update_jump_functions_after_inlining (st /* We must check range due to calls with variable number of arguments and we cannot combine jump functions with operations. */ if (dst->value.pass_through.operation == NOP_EXPR - && (dst->value.pass_through.formal_id + && (top && dst->value.pass_through.formal_id < ipa_get_cs_argument_count (top))) { int dst_fid = dst->value.pass_through.formal_id; src = ipa_get_ith_jump_func (top, dst_fid); bool dst_agg_p = ipa_get_jf_pass_through_agg_preserved (dst); - struct ipa_polymorphic_call_context *src_ctx + class ipa_polymorphic_call_context *src_ctx = ipa_get_ith_polymorhic_call_context (top, dst_fid); if (src_ctx && !src_ctx->useless_p ()) { - struct ipa_polymorphic_call_context ctx = *src_ctx; + class ipa_polymorphic_call_context ctx = *src_ctx; /* TODO: Make type preserved safe WRT contexts. */ if (!ipa_get_jf_pass_through_type_preserved (dst)) @@ -2856,7 +3237,7 @@ ipa_make_edge_direct_to_target (struct c /* Because may-edges are not explicitely represented and vtable may be external, we may create the first reference to the object in the unit. */ - if (!callee || callee->global.inlined_to) + if (!callee || callee->inlined_to) { /* We are better to ensure we can refer to it. @@ -2909,7 +3290,7 @@ ipa_make_edge_direct_to_target (struct c /* We cannot make edges to inline clones. It is bug that someone removed the cgraph node too early. */ - gcc_assert (!callee->global.inlined_to); + gcc_assert (!callee->inlined_to); if (dump_file && !unreachable) { @@ -3059,18 +3440,19 @@ ipa_find_agg_cst_from_init (tree scalar, return find_constructor_constant_at_offset (DECL_INITIAL (scalar), offset); } -/* Retrieve value from aggregate jump function AGG or static initializer of - SCALAR (which can be NULL) for the given OFFSET or return NULL if there is - none. BY_REF specifies whether the value has to be passed by reference or - by value. If FROM_GLOBAL_CONSTANT is non-NULL, then the boolean it points - to is set to true if the value comes from an initializer of a constant. */ +/* Retrieve value from AGG, a set of known offset/value for an aggregate or + static initializer of SCALAR (which can be NULL) for the given OFFSET or + return NULL if there is none. BY_REF specifies whether the value has to be + passed by reference or by value. If FROM_GLOBAL_CONSTANT is non-NULL, then + the boolean it points to is set to true if the value comes from an + initializer of a constant. */ tree -ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *agg, tree scalar, +ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar, HOST_WIDE_INT offset, bool by_ref, bool *from_global_constant) { - struct ipa_agg_jf_item *item; + struct ipa_agg_value *item; int i; if (scalar) @@ -3088,7 +3470,7 @@ ipa_find_agg_cst_for_param (struct ipa_a || by_ref != agg->by_ref) return NULL; - FOR_EACH_VEC_SAFE_ELT (agg->items, i, item) + FOR_EACH_VEC_ELT (agg->items, i, item) if (item->offset == offset) { /* Currently we do not have clobber values, return NULL for them once @@ -3184,12 +3566,14 @@ try_decrement_rdesc_refcount (struct ipa pointer formal parameter described by jump function JFUNC. TARGET_TYPE is the type of the parameter to which the result of JFUNC is passed. If it can be determined, return the newly direct edge, otherwise return NULL. - NEW_ROOT_INFO is the node info that JFUNC lattices are relative to. */ + NEW_ROOT and NEW_ROOT_INFO is the node and its info that JFUNC lattices are + relative to. */ static struct cgraph_edge * try_make_edge_direct_simple_call (struct cgraph_edge *ie, struct ipa_jump_func *jfunc, tree target_type, - struct ipa_node_params *new_root_info) + struct cgraph_node *new_root, + class ipa_node_params *new_root_info) { struct cgraph_edge *cs; tree target; @@ -3198,10 +3582,14 @@ try_make_edge_direct_simple_call (struct if (agg_contents) { bool from_global_constant; - target = ipa_find_agg_cst_for_param (&jfunc->agg, scalar, + ipa_agg_value_set agg = ipa_agg_value_set_from_jfunc (new_root_info, + new_root, + &jfunc->agg); + target = ipa_find_agg_cst_for_param (&agg, scalar, ie->indirect_info->offset, ie->indirect_info->by_ref, &from_global_constant); + agg.release (); if (target && !from_global_constant && !ie->indirect_info->guaranteed_unmodified) @@ -3255,12 +3643,16 @@ ipa_impossible_devirt_target (struct cgr call based on a formal parameter which is described by jump function JFUNC and if it can be determined, make it direct and return the direct edge. Otherwise, return NULL. CTX describes the polymorphic context that the - parameter the call is based on brings along with it. */ + parameter the call is based on brings along with it. NEW_ROOT and + NEW_ROOT_INFO is the node and its info that JFUNC lattices are relative + to. */ static struct cgraph_edge * try_make_edge_direct_virtual_call (struct cgraph_edge *ie, struct ipa_jump_func *jfunc, - struct ipa_polymorphic_call_context ctx) + class ipa_polymorphic_call_context ctx, + struct cgraph_node *new_root, + class ipa_node_params *new_root_info) { tree target = NULL; bool speculative = false; @@ -3278,9 +3670,13 @@ try_make_edge_direct_virtual_call (struc unsigned HOST_WIDE_INT offset; tree scalar = (jfunc->type == IPA_JF_CONST) ? ipa_get_jf_constant (jfunc) : NULL; - tree t = ipa_find_agg_cst_for_param (&jfunc->agg, scalar, + ipa_agg_value_set agg = ipa_agg_value_set_from_jfunc (new_root_info, + new_root, + &jfunc->agg); + tree t = ipa_find_agg_cst_for_param (&agg, scalar, ie->indirect_info->offset, true); + agg.release (); if (t && vtable_pointer_value_to_vtable (t, &vtable, &offset)) { bool can_refer; @@ -3370,21 +3766,22 @@ update_indirect_edges_after_inlining (st struct cgraph_node *node, vec *new_edges) { - struct ipa_edge_args *top; + class ipa_edge_args *top; struct cgraph_edge *ie, *next_ie, *new_direct_edge; - struct ipa_node_params *new_root_info, *inlined_node_info; + struct cgraph_node *new_root; + class ipa_node_params *new_root_info, *inlined_node_info; bool res = false; ipa_check_create_edge_args (); top = IPA_EDGE_REF (cs); - new_root_info = IPA_NODE_REF (cs->caller->global.inlined_to - ? cs->caller->global.inlined_to - : cs->caller); + new_root = cs->caller->inlined_to + ? cs->caller->inlined_to : cs->caller; + new_root_info = IPA_NODE_REF (new_root); inlined_node_info = IPA_NODE_REF (cs->callee->function_symbol ()); for (ie = node->indirect_calls; ie; ie = next_ie) { - struct cgraph_indirect_call_info *ici = ie->indirect_info; + class cgraph_indirect_call_info *ici = ie->indirect_info; struct ipa_jump_func *jfunc; int param_index; cgraph_node *spec_target = NULL; @@ -3395,7 +3792,7 @@ update_indirect_edges_after_inlining (st continue; /* We must check range due to calls with variable number of arguments: */ - if (ici->param_index >= ipa_get_cs_argument_count (top)) + if (!top || ici->param_index >= ipa_get_cs_argument_count (top)) { ici->param_index = -1; continue; @@ -3418,13 +3815,16 @@ update_indirect_edges_after_inlining (st { ipa_polymorphic_call_context ctx; ctx = ipa_context_from_jfunc (new_root_info, cs, param_index, jfunc); - new_direct_edge = try_make_edge_direct_virtual_call (ie, jfunc, ctx); + new_direct_edge = try_make_edge_direct_virtual_call (ie, jfunc, ctx, + new_root, + new_root_info); } else { tree target_type = ipa_get_type (inlined_node_info, param_index); new_direct_edge = try_make_edge_direct_simple_call (ie, jfunc, target_type, + new_root, new_root_info); } @@ -3470,6 +3870,11 @@ update_indirect_edges_after_inlining (st if (ici->polymorphic && !ipa_get_jf_pass_through_type_preserved (jfunc)) ici->vptr_changed = true; + ipa_set_param_used_by_indirect_call (new_root_info, + ici->param_index, true); + if (ici->polymorphic) + ipa_set_param_used_by_polymorphic_call (new_root_info, + ici->param_index, true); } } else if (jfunc->type == IPA_JF_ANCESTOR) @@ -3485,6 +3890,11 @@ update_indirect_edges_after_inlining (st if (ici->polymorphic && !ipa_get_jf_ancestor_type_preserved (jfunc)) ici->vptr_changed = true; + ipa_set_param_used_by_indirect_call (new_root_info, + ici->param_index, true); + if (ici->polymorphic) + ipa_set_param_used_by_polymorphic_call (new_root_info, + ici->param_index, true); } } else @@ -3541,13 +3951,18 @@ combine_controlled_uses_counters (int c, static void propagate_controlled_uses (struct cgraph_edge *cs) { - struct ipa_edge_args *args = IPA_EDGE_REF (cs); - struct cgraph_node *new_root = cs->caller->global.inlined_to - ? cs->caller->global.inlined_to : cs->caller; - struct ipa_node_params *new_root_info = IPA_NODE_REF (new_root); - struct ipa_node_params *old_root_info = IPA_NODE_REF (cs->callee); + class ipa_edge_args *args = IPA_EDGE_REF (cs); + if (!args) + return; + struct cgraph_node *new_root = cs->caller->inlined_to + ? cs->caller->inlined_to : cs->caller; + class ipa_node_params *new_root_info = IPA_NODE_REF (new_root); + class ipa_node_params *old_root_info = IPA_NODE_REF (cs->callee); int count, i; + if (!old_root_info) + return; + count = MIN (ipa_get_cs_argument_count (args), ipa_get_param_count (old_root_info)); for (i = 0; i < count; i++) @@ -3608,9 +4023,9 @@ propagate_controlled_uses (struct cgraph gcc_checking_assert (ok); clone = cs->caller; - while (clone->global.inlined_to - && clone != rdesc->cs->caller - && IPA_NODE_REF (clone)->ipcp_orig_node) + while (clone->inlined_to + && clone->ipcp_clone + && clone != rdesc->cs->caller) { struct ipa_ref *ref; ref = clone->find_reference (n, NULL, 0); @@ -3669,6 +4084,7 @@ ipa_propagate_indirect_call_infos (struc propagate_controlled_uses (cs); changed = propagate_info_to_inlined_callees (cs, cs->callee, new_edges); + ipa_node_params_sum->remove (cs->callee); return changed; } @@ -3830,16 +4246,16 @@ ipa_edge_args_sum_t::duplicate (cgraph_e We need to find the duplicate that refers to our tree of inline clones. */ - gcc_assert (dst->caller->global.inlined_to); + gcc_assert (dst->caller->inlined_to); for (dst_rdesc = src_rdesc->next_duplicate; dst_rdesc; dst_rdesc = dst_rdesc->next_duplicate) { struct cgraph_node *top; - top = dst_rdesc->cs->caller->global.inlined_to - ? dst_rdesc->cs->caller->global.inlined_to + top = dst_rdesc->cs->caller->inlined_to + ? dst_rdesc->cs->caller->inlined_to : dst_rdesc->cs->caller; - if (dst->caller->global.inlined_to == top) + if (dst->caller->inlined_to == top) break; } gcc_assert (dst_rdesc); @@ -3849,9 +4265,9 @@ ipa_edge_args_sum_t::duplicate (cgraph_e else if (dst_jf->type == IPA_JF_PASS_THROUGH && src->caller == dst->caller) { - struct cgraph_node *inline_root = dst->caller->global.inlined_to - ? dst->caller->global.inlined_to : dst->caller; - struct ipa_node_params *root_info = IPA_NODE_REF (inline_root); + struct cgraph_node *inline_root = dst->caller->inlined_to + ? dst->caller->inlined_to : dst->caller; + class ipa_node_params *root_info = IPA_NODE_REF (inline_root); int idx = ipa_get_jf_pass_through_formal_id (dst_jf); int c = ipa_get_controlled_uses (root_info, idx); @@ -3995,7 +4411,7 @@ void ipa_print_node_params (FILE *f, struct cgraph_node *node) { int i, count; - struct ipa_node_params *info; + class ipa_node_params *info; if (!node->definition) return; @@ -4010,6 +4426,12 @@ ipa_print_node_params (FILE *f, struct c ipa_dump_param (f, info, i); if (ipa_is_param_used (info, i)) fprintf (f, " used"); + if (ipa_is_param_used_by_ipa_predicates (info, i)) + fprintf (f, " used_by_ipa_predicates"); + if (ipa_is_param_used_by_indirect_call (info, i)) + fprintf (f, " used_by_indirect_call"); + if (ipa_is_param_used_by_polymorphic_call (info, i)) + fprintf (f, " used_by_polymorphic_call"); c = ipa_get_controlled_uses (info, i); if (c == IPA_UNDESCRIBED_USE) fprintf (f, " undescribed_use"); @@ -4104,6 +4526,8 @@ ipa_write_jump_function (struct output_b bp_pack_value (&bp, jump_func->value.ancestor.agg_preserved, 1); streamer_write_bitpack (&bp); break; + default: + fatal_error (UNKNOWN_LOCATION, "invalid jump function in LTO stream"); } count = vec_safe_length (jump_func->agg.items); @@ -4117,8 +4541,36 @@ ipa_write_jump_function (struct output_b FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, i, item) { + stream_write_tree (ob, item->type, true); streamer_write_uhwi (ob, item->offset); - stream_write_tree (ob, item->value, true); + streamer_write_uhwi (ob, item->jftype); + switch (item->jftype) + { + case IPA_JF_UNKNOWN: + break; + case IPA_JF_CONST: + stream_write_tree (ob, item->value.constant, true); + break; + case IPA_JF_PASS_THROUGH: + case IPA_JF_LOAD_AGG: + streamer_write_uhwi (ob, item->value.pass_through.operation); + streamer_write_uhwi (ob, item->value.pass_through.formal_id); + if (TREE_CODE_CLASS (item->value.pass_through.operation) + != tcc_unary) + stream_write_tree (ob, item->value.pass_through.operand, true); + if (item->jftype == IPA_JF_LOAD_AGG) + { + stream_write_tree (ob, item->value.load_agg.type, true); + streamer_write_uhwi (ob, item->value.load_agg.offset); + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, item->value.load_agg.by_ref, 1); + streamer_write_bitpack (&bp); + } + break; + default: + fatal_error (UNKNOWN_LOCATION, + "invalid jump function in LTO stream"); + } } bp = bitpack_create (ob->main_stream); @@ -4143,10 +4595,10 @@ ipa_write_jump_function (struct output_b /* Read in jump function JUMP_FUNC from IB. */ static void -ipa_read_jump_function (struct lto_input_block *ib, +ipa_read_jump_function (class lto_input_block *ib, struct ipa_jump_func *jump_func, struct cgraph_edge *cs, - struct data_in *data_in, + class data_in *data_in, bool prevails) { enum jump_func_type jftype; @@ -4215,8 +4667,39 @@ ipa_read_jump_function (struct lto_input for (i = 0; i < count; i++) { struct ipa_agg_jf_item item; + item.type = stream_read_tree (ib, data_in); item.offset = streamer_read_uhwi (ib); - item.value = stream_read_tree (ib, data_in); + item.jftype = (enum jump_func_type) streamer_read_uhwi (ib); + + switch (item.jftype) + { + case IPA_JF_UNKNOWN: + break; + case IPA_JF_CONST: + item.value.constant = stream_read_tree (ib, data_in); + break; + case IPA_JF_PASS_THROUGH: + case IPA_JF_LOAD_AGG: + operation = (enum tree_code) streamer_read_uhwi (ib); + item.value.pass_through.operation = operation; + item.value.pass_through.formal_id = streamer_read_uhwi (ib); + if (TREE_CODE_CLASS (operation) == tcc_unary) + item.value.pass_through.operand = NULL_TREE; + else + item.value.pass_through.operand = stream_read_tree (ib, data_in); + if (item.jftype == IPA_JF_LOAD_AGG) + { + struct bitpack_d bp; + item.value.load_agg.type = stream_read_tree (ib, data_in); + item.value.load_agg.offset = streamer_read_uhwi (ib); + bp = streamer_read_bitpack (ib); + item.value.load_agg.by_ref = bp_unpack_value (&bp, 1); + } + break; + default: + fatal_error (UNKNOWN_LOCATION, + "invalid jump function in LTO stream"); + } if (prevails) jump_func->agg.items->quick_push (item); } @@ -4255,7 +4738,7 @@ static void ipa_write_indirect_edge_info (struct output_block *ob, struct cgraph_edge *cs) { - struct cgraph_indirect_call_info *ii = cs->indirect_info; + class cgraph_indirect_call_info *ii = cs->indirect_info; struct bitpack_d bp; streamer_write_hwi (ob, ii->param_index); @@ -4284,11 +4767,12 @@ ipa_write_indirect_edge_info (struct out relevant to indirect inlining from IB. */ static void -ipa_read_indirect_edge_info (struct lto_input_block *ib, - struct data_in *data_in, - struct cgraph_edge *cs) +ipa_read_indirect_edge_info (class lto_input_block *ib, + class data_in *data_in, + struct cgraph_edge *cs, + class ipa_node_params *info) { - struct cgraph_indirect_call_info *ii = cs->indirect_info; + class cgraph_indirect_call_info *ii = cs->indirect_info; struct bitpack_d bp; ii->param_index = (int) streamer_read_hwi (ib); @@ -4309,6 +4793,14 @@ ipa_read_indirect_edge_info (struct lto_ ii->otr_type = stream_read_tree (ib, data_in); ii->context.stream_in (ib, data_in); } + if (info && ii->param_index >= 0) + { + if (ii->polymorphic) + ipa_set_param_used_by_polymorphic_call (info, + ii->param_index , true); + ipa_set_param_used_by_indirect_call (info, + ii->param_index, true); + } } /* Stream out NODE info to OB. */ @@ -4318,7 +4810,7 @@ ipa_write_node_info (struct output_block { int node_ref; lto_symtab_encoder_t encoder; - struct ipa_node_params *info = IPA_NODE_REF (node); + class ipa_node_params *info = IPA_NODE_REF (node); int j; struct cgraph_edge *e; struct bitpack_d bp; @@ -4345,7 +4837,13 @@ ipa_write_node_info (struct output_block } for (e = node->callees; e; e = e->next_callee) { - struct ipa_edge_args *args = IPA_EDGE_REF (e); + class ipa_edge_args *args = IPA_EDGE_REF (e); + + if (!args) + { + streamer_write_uhwi (ob, 0); + continue; + } streamer_write_uhwi (ob, ipa_get_cs_argument_count (args) * 2 @@ -4359,16 +4857,20 @@ ipa_write_node_info (struct output_block } for (e = node->indirect_calls; e; e = e->next_callee) { - struct ipa_edge_args *args = IPA_EDGE_REF (e); - - streamer_write_uhwi (ob, - ipa_get_cs_argument_count (args) * 2 - + (args->polymorphic_call_contexts != NULL)); - for (j = 0; j < ipa_get_cs_argument_count (args); j++) + class ipa_edge_args *args = IPA_EDGE_REF (e); + if (!args) + streamer_write_uhwi (ob, 0); + else { - ipa_write_jump_function (ob, ipa_get_ith_jump_func (args, j)); - if (args->polymorphic_call_contexts != NULL) - ipa_get_ith_polymorhic_call_context (args, j)->stream_out (ob); + streamer_write_uhwi (ob, + ipa_get_cs_argument_count (args) * 2 + + (args->polymorphic_call_contexts != NULL)); + for (j = 0; j < ipa_get_cs_argument_count (args); j++) + { + ipa_write_jump_function (ob, ipa_get_ith_jump_func (args, j)); + if (args->polymorphic_call_contexts != NULL) + ipa_get_ith_polymorhic_call_context (args, j)->stream_out (ob); + } } ipa_write_indirect_edge_info (ob, e); } @@ -4377,8 +4879,8 @@ ipa_write_node_info (struct output_block /* Stream in edge E from IB. */ static void -ipa_read_edge_info (struct lto_input_block *ib, - struct data_in *data_in, +ipa_read_edge_info (class lto_input_block *ib, + class data_in *data_in, struct cgraph_edge *e, bool prevails) { int count = streamer_read_uhwi (ib); @@ -4389,7 +4891,7 @@ ipa_read_edge_info (struct lto_input_blo return; if (prevails && e->possibly_call_in_translation_unit_p ()) { - struct ipa_edge_args *args = IPA_EDGE_REF (e); + class ipa_edge_args *args = IPA_EDGE_REF_GET_CREATE (e); vec_safe_grow_cleared (args->jump_functions, count); if (contexts_computed) vec_safe_grow_cleared (args->polymorphic_call_contexts, count); @@ -4411,7 +4913,7 @@ ipa_read_edge_info (struct lto_input_blo data_in, prevails); if (contexts_computed) { - struct ipa_polymorphic_call_context ctx; + class ipa_polymorphic_call_context ctx; ctx.stream_in (ib, data_in); } } @@ -4421,14 +4923,15 @@ ipa_read_edge_info (struct lto_input_blo /* Stream in NODE info from IB. */ static void -ipa_read_node_info (struct lto_input_block *ib, struct cgraph_node *node, - struct data_in *data_in) +ipa_read_node_info (class lto_input_block *ib, struct cgraph_node *node, + class data_in *data_in) { int k; struct cgraph_edge *e; struct bitpack_d bp; bool prevails = node->prevailing_p (); - struct ipa_node_params *info = prevails ? IPA_NODE_REF (node) : NULL; + class ipa_node_params *info = prevails + ? IPA_NODE_REF_GET_CREATE (node) : NULL; int param_count = streamer_read_uhwi (ib); if (prevails) @@ -4468,7 +4971,7 @@ ipa_read_node_info (struct lto_input_blo for (e = node->indirect_calls; e; e = e->next_callee) { ipa_read_edge_info (ib, data_in, e, prevails); - ipa_read_indirect_edge_info (ib, data_in, e); + ipa_read_indirect_edge_info (ib, data_in, e, info); } } @@ -4525,7 +5028,7 @@ ipa_prop_read_section (struct lto_file_d const int cfg_offset = sizeof (struct lto_function_header); const int main_offset = cfg_offset + header->cfg_size; const int string_offset = main_offset + header->main_size; - struct data_in *data_in; + class data_in *data_in; unsigned int i; unsigned int count; @@ -4774,7 +5277,7 @@ read_replacements_section (struct lto_fi const int cfg_offset = sizeof (struct lto_function_header); const int main_offset = cfg_offset + header->cfg_size; const int string_offset = main_offset + header->main_size; - struct data_in *data_in; + class data_in *data_in; unsigned int i; unsigned int count; @@ -4888,7 +5391,8 @@ ipcp_modif_dom_walker::before_dom_childr struct ipa_agg_replacement_value *v; gimple *stmt = gsi_stmt (gsi); tree rhs, val, t; - HOST_WIDE_INT offset, size; + HOST_WIDE_INT offset; + poly_int64 size; int index; bool by_ref, vce; @@ -4923,7 +5427,8 @@ ipcp_modif_dom_walker::before_dom_childr break; if (!v || v->by_ref != by_ref - || tree_to_shwi (TYPE_SIZE (TREE_TYPE (v->value))) != size) + || maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (v->value))), + size)) continue; gcc_checking_assert (is_gimple_ip_invariant (v->value)); @@ -5194,4 +5699,12 @@ ipcp_transform_function (struct cgraph_n return TODO_update_ssa_only_virtuals; } + +/* Return true if OTHER describes same agg value. */ +bool +ipa_agg_value::equal_to (const ipa_agg_value &other) +{ + return offset == other.offset + && operand_equal_p (value, other.value, 0); +} #include "gt-ipa-prop.h" diff -Nurp a/gcc/ipa-prop.h b/gcc/ipa-prop.h --- a/gcc/ipa-prop.h 2020-04-30 15:14:04.624000000 +0800 +++ b/gcc/ipa-prop.h 2020-04-30 15:14:56.696000000 +0800 @@ -39,6 +39,15 @@ along with GCC; see the file COPYING3. argument. Unknown - neither of the above. + IPA_JF_LOAD_AGG is a compound pass-through jump function, in which primary + operation on formal parameter is memory dereference that loads a value from + a part of an aggregate, which is represented or pointed to by the formal + parameter. Moreover, an additional unary/binary operation can be applied on + the loaded value, and final result is passed as actual argument of callee + (e.g. *(param_1(D) + 4) op 24 ). It is meant to describe usage of aggregate + parameter or by-reference parameter referenced in argument passing, commonly + found in C++ and Fortran. + IPA_JF_ANCESTOR is a special pass-through jump function, which means that the result is an address of a part of the object pointed to by the formal parameter to which the function refers. It is mainly intended to represent @@ -60,6 +69,7 @@ enum jump_func_type IPA_JF_UNKNOWN = 0, /* newly allocated and zeroed jump functions default */ IPA_JF_CONST, /* represented by field costant */ IPA_JF_PASS_THROUGH, /* represented by field pass_through */ + IPA_JF_LOAD_AGG, /* represented by field load_agg */ IPA_JF_ANCESTOR /* represented by field ancestor */ }; @@ -97,6 +107,26 @@ struct GTY(()) ipa_pass_through_data unsigned agg_preserved : 1; }; +/* Structure holding data required to describe a load-value-from-aggregate + jump function. */ + +struct GTY(()) ipa_load_agg_data +{ + /* Inherit from pass through jump function, describing unary/binary + operation on the value loaded from aggregate that is represented or + pointed to by the formal parameter, specified by formal_id in this + pass_through jump function data structure. */ + struct ipa_pass_through_data pass_through; + /* Type of the value loaded from the aggregate. */ + tree type; + /* Offset at which the value is located within the aggregate. */ + HOST_WIDE_INT offset; + /* True if loaded by reference (the aggregate is pointed to by the formal + parameter) or false if loaded by value (the aggregate is represented + by the formal parameter). */ + bool by_ref; +}; + /* Structure holding data required to describe an ancestor pass-through jump function. */ @@ -110,38 +140,139 @@ struct GTY(()) ipa_ancestor_jf_data unsigned agg_preserved : 1; }; -/* An element in an aggegate part of a jump function describing a known value - at a given offset. When it is part of a pass-through jump function with - agg_preserved set or an ancestor jump function with agg_preserved set, all - unlisted positions are assumed to be preserved but the value can be a type - node, which means that the particular piece (starting at offset and having - the size of the type) is clobbered with an unknown value. When - agg_preserved is false or the type of the containing jump function is - different, all unlisted parts are assumed to be unknown and all values must - fulfill is_gimple_ip_invariant. */ +/* A jump function for an aggregate part at a given offset, which describes how + it content value is generated. All unlisted positions are assumed to have a + value defined in an unknown way. */ struct GTY(()) ipa_agg_jf_item { - /* The offset at which the known value is located within the aggregate. */ + /* The offset for the aggregate part. */ HOST_WIDE_INT offset; - /* The known constant or type if this is a clobber. */ - tree value; -}; + /* Data type of the aggregate part. */ + tree type; + /* Jump function type. */ + enum jump_func_type jftype; -/* Aggregate jump function - i.e. description of contents of aggregates passed - either by reference or value. */ + /* Represents a value of jump function. constant represents the actual constant + in constant jump function content. pass_through is used only in simple pass + through jump function context. load_agg is for load-value-from-aggregate + jump function context. */ + union jump_func_agg_value + { + tree GTY ((tag ("IPA_JF_CONST"))) constant; + struct ipa_pass_through_data GTY ((tag ("IPA_JF_PASS_THROUGH"))) pass_through; + struct ipa_load_agg_data GTY ((tag ("IPA_JF_LOAD_AGG"))) load_agg; + } GTY ((desc ("%1.jftype"))) value; +}; + +/* Jump functions describing a set of aggregate contents. */ struct GTY(()) ipa_agg_jump_function { - /* Description of the individual items. */ + /* Description of the individual jump function item. */ vec *items; - /* True if the data was passed by reference (as opposed to by value). */ + /* True if the data was passed by reference (as opposed to by value). */ + bool by_ref; +}; + +/* An element in an aggregate part describing a known value at a given offset. + All unlisted positions are assumed to be unknown and all listed values must + fulfill is_gimple_ip_invariant. */ + +struct ipa_agg_value +{ + /* The offset at which the known value is located within the aggregate. */ + HOST_WIDE_INT offset; + + /* The known constant. */ + tree value; + + /* Return true if OTHER describes same agg value. */ + bool equal_to (const ipa_agg_value &other); +}; + +/* Structure describing a set of known offset/value for aggregate. */ + +struct ipa_agg_value_set +{ + /* Description of the individual item. */ + vec items; + /* True if the data was passed by reference (as opposed to by value). */ bool by_ref; + + /* Return true if OTHER describes same agg values. */ + bool equal_to (const ipa_agg_value_set &other) + { + if (by_ref != other.by_ref) + return false; + if (items.length () != other.items.length ()) + return false; + for (unsigned int i = 0; i < items.length (); i++) + if (!items[i].equal_to (other.items[i])) + return false; + return true; + } + + /* Return true if there is any value for aggregate. */ + bool is_empty () const + { + return items.is_empty (); + } + + ipa_agg_value_set copy () const + { + ipa_agg_value_set new_copy; + + new_copy.items = items.copy (); + new_copy.by_ref = by_ref; + + return new_copy; + } + + void release () + { + items.release (); + } }; -typedef struct ipa_agg_jump_function *ipa_agg_jump_function_p; +/* Return copy of a vec. */ + +static inline vec +ipa_copy_agg_values (const vec &aggs) +{ + vec aggs_copy = vNULL; + + if (!aggs.is_empty ()) + { + ipa_agg_value_set *agg; + int i; + + aggs_copy.reserve_exact (aggs.length ()); + + FOR_EACH_VEC_ELT (aggs, i, agg) + aggs_copy.quick_push (agg->copy ()); + } + + return aggs_copy; +} + +/* For vec, DO NOT call release(), use below function + instead. Because ipa_agg_value_set contains a field of vector type, we + should release this child vector in each element before reclaiming the + whole vector. */ + +static inline void +ipa_release_agg_values (vec &aggs) +{ + ipa_agg_value_set *agg; + int i; + + FOR_EACH_VEC_ELT (aggs, i, agg) + agg->release (); + aggs.release (); +} /* Information about zero/non-zero bits. */ struct GTY(()) ipa_bits @@ -170,19 +301,19 @@ struct GTY(()) ipa_vr types of jump functions supported. */ struct GTY (()) ipa_jump_func { - /* Aggregate contants description. See struct ipa_agg_jump_function and its - description. */ + /* Aggregate jump function description. See struct ipa_agg_jump_function + and its description. */ struct ipa_agg_jump_function agg; /* Information about zero/non-zero bits. The pointed to structure is shared betweed different jump functions. Use ipa_set_jfunc_bits to set this field. */ - struct ipa_bits *bits; + class ipa_bits *bits; /* Information about value range, containing valid data only when vr_known is true. The pointed to structure is shared betweed different jump functions. Use ipa_set_jfunc_vr to set this field. */ - struct value_range_base *m_vr; + class value_range_base *m_vr; enum jump_func_type type; /* Represents a value of a jump function. pass_through is used only in jump @@ -310,9 +441,12 @@ struct GTY(()) ipa_param_descriptor says how many there are. If any use could not be described by means of ipa-prop structures, this is IPA_UNDESCRIBED_USE. */ int controlled_uses; - unsigned int move_cost : 31; + unsigned int move_cost : 28; /* The parameter is used. */ unsigned used : 1; + unsigned used_by_ipa_predicates : 1; + unsigned used_by_indirect_call : 1; + unsigned used_by_polymorphic_call : 1; }; /* ipa_node_params stores information related to formal parameters of functions @@ -332,7 +466,7 @@ struct GTY((for_user)) ipa_node_params vec *descriptors; /* Pointer to an array of structures describing individual formal parameters. */ - struct ipcp_param_lattices * GTY((skip)) lattices; + class ipcp_param_lattices * GTY((skip)) lattices; /* Only for versioned nodes this field would not be NULL, it points to the node that IPA cp cloned from. */ struct cgraph_node * GTY((skip)) ipcp_orig_node; @@ -357,6 +491,8 @@ struct GTY((for_user)) ipa_node_params unsigned node_dead : 1; /* Node is involved in a recursion, potentionally indirect. */ unsigned node_within_scc : 1; + /* Node contains only direct recursion. */ + unsigned node_is_self_scc : 1; /* Node is calling a private function called only once. */ unsigned node_calling_single_call : 1; /* False when there is something makes versioning impossible. */ @@ -420,7 +556,7 @@ struct ipa_func_body_info cgraph_node *node; /* Its info. */ - struct ipa_node_params *info; + class ipa_node_params *info; /* Information about individual BBs. */ vec bb_infos; @@ -439,7 +575,7 @@ struct ipa_func_body_info /* Return the number of formal parameters. */ static inline int -ipa_get_param_count (struct ipa_node_params *info) +ipa_get_param_count (class ipa_node_params *info) { return vec_safe_length (info->descriptors); } @@ -450,10 +586,9 @@ ipa_get_param_count (struct ipa_node_par WPA. */ static inline tree -ipa_get_param (struct ipa_node_params *info, int i) +ipa_get_param (class ipa_node_params *info, int i) { gcc_checking_assert (info->descriptors); - gcc_checking_assert (!flag_wpa); tree t = (*info->descriptors)[i].decl_or_type; gcc_checking_assert (TREE_CODE (t) == PARM_DECL); return t; @@ -463,7 +598,7 @@ ipa_get_param (struct ipa_node_params *i to INFO if it is known or NULL if not. */ static inline tree -ipa_get_type (struct ipa_node_params *info, int i) +ipa_get_type (class ipa_node_params *info, int i) { if (vec_safe_length (info->descriptors) <= (unsigned) i) return NULL; @@ -480,7 +615,7 @@ ipa_get_type (struct ipa_node_params *in to INFO. */ static inline int -ipa_get_param_move_cost (struct ipa_node_params *info, int i) +ipa_get_param_move_cost (class ipa_node_params *info, int i) { gcc_checking_assert (info->descriptors); return (*info->descriptors)[i].move_cost; @@ -490,17 +625,47 @@ ipa_get_param_move_cost (struct ipa_node associated with INFO to VAL. */ static inline void -ipa_set_param_used (struct ipa_node_params *info, int i, bool val) +ipa_set_param_used (class ipa_node_params *info, int i, bool val) { gcc_checking_assert (info->descriptors); (*info->descriptors)[i].used = val; } +/* Set the used_by_ipa_predicates flag corresponding to the Ith formal + parameter of the function associated with INFO to VAL. */ + +static inline void +ipa_set_param_used_by_ipa_predicates (class ipa_node_params *info, int i, bool val) +{ + gcc_checking_assert (info->descriptors); + (*info->descriptors)[i].used_by_ipa_predicates = val; +} + +/* Set the used_by_indirect_call flag corresponding to the Ith formal + parameter of the function associated with INFO to VAL. */ + +static inline void +ipa_set_param_used_by_indirect_call (class ipa_node_params *info, int i, bool val) +{ + gcc_checking_assert (info->descriptors); + (*info->descriptors)[i].used_by_indirect_call = val; +} + +/* Set the .used_by_polymorphic_call flag corresponding to the Ith formal + parameter of the function associated with INFO to VAL. */ + +static inline void +ipa_set_param_used_by_polymorphic_call (class ipa_node_params *info, int i, bool val) +{ + gcc_checking_assert (info->descriptors); + (*info->descriptors)[i].used_by_polymorphic_call = val; +} + /* Return how many uses described by ipa-prop a parameter has or IPA_UNDESCRIBED_USE if there is a use that is not described by these structures. */ static inline int -ipa_get_controlled_uses (struct ipa_node_params *info, int i) +ipa_get_controlled_uses (class ipa_node_params *info, int i) { /* FIXME: introducing speculation causes out of bounds access here. */ if (vec_safe_length (info->descriptors) > (unsigned)i) @@ -511,7 +676,7 @@ ipa_get_controlled_uses (struct ipa_node /* Set the controlled counter of a given parameter. */ static inline void -ipa_set_controlled_uses (struct ipa_node_params *info, int i, int val) +ipa_set_controlled_uses (class ipa_node_params *info, int i, int val) { gcc_checking_assert (info->descriptors); (*info->descriptors)[i].controlled_uses = val; @@ -521,12 +686,42 @@ ipa_set_controlled_uses (struct ipa_node function associated with INFO. */ static inline bool -ipa_is_param_used (struct ipa_node_params *info, int i) +ipa_is_param_used (class ipa_node_params *info, int i) { gcc_checking_assert (info->descriptors); return (*info->descriptors)[i].used; } +/* Return the used_by_ipa_predicates flag corresponding to the Ith formal + parameter of the function associated with INFO. */ + +static inline bool +ipa_is_param_used_by_ipa_predicates (class ipa_node_params *info, int i) +{ + gcc_checking_assert (info->descriptors); + return (*info->descriptors)[i].used_by_ipa_predicates; +} + +/* Return the used_by_indirect_call flag corresponding to the Ith formal + parameter of the function associated with INFO. */ + +static inline bool +ipa_is_param_used_by_indirect_call (class ipa_node_params *info, int i) +{ + gcc_checking_assert (info->descriptors); + return (*info->descriptors)[i].used_by_indirect_call; +} + +/* Return the used_by_polymorphic_call flag corresponding to the Ith formal + parameter of the function associated with INFO. */ + +static inline bool +ipa_is_param_used_by_polymorphic_call (class ipa_node_params *info, int i) +{ + gcc_checking_assert (info->descriptors); + return (*info->descriptors)[i].used_by_polymorphic_call; +} + /* Information about replacements done in aggregates for a given node (each node has its linked list). */ struct GTY(()) ipa_agg_replacement_value @@ -590,7 +785,7 @@ class GTY((for_user)) ipa_edge_args /* Return the number of actual arguments. */ static inline int -ipa_get_cs_argument_count (struct ipa_edge_args *args) +ipa_get_cs_argument_count (class ipa_edge_args *args) { return vec_safe_length (args->jump_functions); } @@ -600,15 +795,15 @@ ipa_get_cs_argument_count (struct ipa_ed ipa_compute_jump_functions. */ static inline struct ipa_jump_func * -ipa_get_ith_jump_func (struct ipa_edge_args *args, int i) +ipa_get_ith_jump_func (class ipa_edge_args *args, int i) { return &(*args->jump_functions)[i]; } /* Returns a pointer to the polymorphic call context for the ith argument. NULL if contexts are not computed. */ -static inline struct ipa_polymorphic_call_context * -ipa_get_ith_polymorhic_call_context (struct ipa_edge_args *args, int i) +static inline class ipa_polymorphic_call_context * +ipa_get_ith_polymorhic_call_context (class ipa_edge_args *args, int i) { if (!args->polymorphic_call_contexts) return NULL; @@ -637,7 +832,12 @@ class GTY((user)) ipa_edge_args_sum_t : ipa_edge_args_sum_t (symbol_table *table, bool ggc) : call_summary (table, ggc) { } - /* Hook that is called by summary when an edge is duplicated. */ + void remove (cgraph_edge *edge) + { + call_summary ::remove (edge); + } + + /* Hook that is called by summary when an edge is removed. */ virtual void remove (cgraph_edge *cs, ipa_edge_args *args); /* Hook that is called by summary when an edge is duplicated. */ virtual void duplicate (cgraph_edge *src, @@ -675,8 +875,10 @@ extern GTY(()) function_summary get_create (NODE)) -#define IPA_EDGE_REF(EDGE) (ipa_edge_args_sum->get_create (EDGE)) +#define IPA_NODE_REF(NODE) (ipa_node_params_sum->get (NODE)) +#define IPA_NODE_REF_GET_CREATE(NODE) (ipa_node_params_sum->get_create (NODE)) +#define IPA_EDGE_REF(EDGE) (ipa_edge_args_sum->get (EDGE)) +#define IPA_EDGE_REF_GET_CREATE(EDGE) (ipa_edge_args_sum->get_create (EDGE)) /* This macro checks validity of index returned by ipa_get_param_decl_index function. */ #define IS_VALID_JUMP_FUNC_INDEX(I) ((I) != -1) @@ -740,9 +942,9 @@ bool ipa_propagate_indirect_call_infos ( /* Indirect edge and binfo processing. */ tree ipa_get_indirect_edge_target (struct cgraph_edge *ie, - vec , + vec, vec, - vec, + vec, bool *); struct cgraph_edge *ipa_make_edge_direct_to_target (struct cgraph_edge *, tree, bool speculative = false); @@ -755,13 +957,13 @@ ipa_bits *ipa_get_ipa_bits_for_value (co void ipa_analyze_node (struct cgraph_node *); /* Aggregate jump function related functions. */ -tree ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *agg, tree scalar, +tree ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar, HOST_WIDE_INT offset, bool by_ref, bool *from_global_constant = NULL); bool ipa_load_from_parm_agg (struct ipa_func_body_info *fbi, vec *descriptors, gimple *stmt, tree op, int *index_p, - HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p, + HOST_WIDE_INT *offset_p, poly_int64 *size_p, bool *by_ref, bool *guaranteed_unmodified = NULL); /* Debugging interface. */ @@ -779,11 +981,11 @@ extern object_allocator -class ipcp_value_source; +struct ipcp_value_source; extern object_allocator > ipcp_sources_pool; -class ipcp_agg_lattice; +struct ipcp_agg_lattice; extern object_allocator ipcp_agg_lattice_pool; @@ -793,15 +995,18 @@ void ipa_prop_write_jump_functions (void void ipa_prop_read_jump_functions (void); void ipcp_write_transformation_summaries (void); void ipcp_read_transformation_summaries (void); -int ipa_get_param_decl_index (struct ipa_node_params *, tree); -tree ipa_value_from_jfunc (struct ipa_node_params *info, +int ipa_get_param_decl_index (class ipa_node_params *, tree); +tree ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc, tree type); unsigned int ipcp_transform_function (struct cgraph_node *node); ipa_polymorphic_call_context ipa_context_from_jfunc (ipa_node_params *, cgraph_edge *, int, ipa_jump_func *); -void ipa_dump_param (FILE *, struct ipa_node_params *info, int i); +ipa_agg_value_set ipa_agg_value_set_from_jfunc (ipa_node_params *, + cgraph_node *, + ipa_agg_jump_function *); +void ipa_dump_param (FILE *, class ipa_node_params *info, int i); void ipa_release_body_info (struct ipa_func_body_info *); tree ipa_get_callee_param_type (struct cgraph_edge *e, int i); diff -Nurp a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c --- a/gcc/ipa-pure-const.c 2020-04-30 15:14:04.600000000 +0800 +++ b/gcc/ipa-pure-const.c 2020-04-30 15:14:56.588000000 +0800 @@ -1360,12 +1360,14 @@ ignore_edge_for_nothrow (struct cgraph_e return true; enum availability avail; - cgraph_node *n = e->callee->function_or_virtual_thunk_symbol (&avail, - e->caller); - if (avail <= AVAIL_INTERPOSABLE || TREE_NOTHROW (n->decl)) + cgraph_node *ultimate_target + = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); + if (avail <= AVAIL_INTERPOSABLE || TREE_NOTHROW (ultimate_target->decl)) return true; - return opt_for_fn (e->callee->decl, flag_non_call_exceptions) - && !e->callee->binds_to_current_def_p (e->caller); + return ((opt_for_fn (e->callee->decl, flag_non_call_exceptions) + && !e->callee->binds_to_current_def_p (e->caller)) + || !opt_for_fn (e->caller->decl, flag_ipa_pure_const) + || !opt_for_fn (ultimate_target->decl, flag_ipa_pure_const)); } /* Return true if NODE is self recursive function. @@ -1395,16 +1397,21 @@ cdtor_p (cgraph_node *n, void *) return false; } -/* We only propagate across edges with non-interposable callee. */ +/* Skip edges from and to nodes without ipa_pure_const enabled. + Ignore not available symbols. */ static bool ignore_edge_for_pure_const (struct cgraph_edge *e) { enum availability avail; - e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); - return (avail <= AVAIL_INTERPOSABLE); -} + cgraph_node *ultimate_target + = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); + return (avail <= AVAIL_INTERPOSABLE + || !opt_for_fn (e->caller->decl, flag_ipa_pure_const) + || !opt_for_fn (ultimate_target->decl, + flag_ipa_pure_const)); +} /* Produce transitive closure over the callgraph and compute pure/const attributes. */ @@ -1670,7 +1677,7 @@ propagate_pure_const (void) /* Inline clones share declaration with their offline copies; do not modify their declarations since the offline copy may be different. */ - if (!w->global.inlined_to) + if (!w->inlined_to) switch (this_state) { case IPA_CONST: @@ -1831,7 +1838,7 @@ propagate_nothrow (void) /* Inline clones share declaration with their offline copies; do not modify their declarations since the offline copy may be different. */ - if (!w->global.inlined_to) + if (!w->inlined_to) { w->set_nothrow_flag (true); if (dump_file) @@ -1958,7 +1965,7 @@ propagate_malloc (void) funct_state l = funct_state_summaries->get (node); if (!node->alias && l->malloc_state == STATE_MALLOC - && !node->global.inlined_to) + && !node->inlined_to) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Function %s found to be malloc\n", diff -Nurp a/gcc/ipa-reference.c b/gcc/ipa-reference.c --- a/gcc/ipa-reference.c 2020-04-30 15:14:04.644000000 +0800 +++ b/gcc/ipa-reference.c 2020-04-30 15:14:56.588000000 +0800 @@ -46,7 +46,6 @@ along with GCC; see the file COPYING3. #include "cgraph.h" #include "data-streamer.h" #include "calls.h" -#include "splay-tree.h" #include "ipa-utils.h" #include "ipa-reference.h" #include "symbol-summary.h" @@ -75,8 +74,8 @@ struct ipa_reference_global_vars_info_d struct ipa_reference_optimization_summary_d { - bitmap statics_not_read; - bitmap statics_not_written; + bitmap statics_read; + bitmap statics_written; }; typedef ipa_reference_local_vars_info_d *ipa_reference_local_vars_info_t; @@ -92,14 +91,20 @@ struct ipa_reference_vars_info_d typedef struct ipa_reference_vars_info_d *ipa_reference_vars_info_t; -/* This splay tree contains all of the static variables that are +/* This map contains all of the static variables that are being considered by the compilation level alias analysis. */ -static splay_tree reference_vars_to_consider; +typedef hash_map reference_vars_map_t; +static reference_vars_map_t *ipa_reference_vars_map; +static int ipa_reference_vars_uids; +static vec *reference_vars_to_consider; +varpool_node_hook_list *varpool_node_hooks; /* Set of all interesting module statics. A bit is set for every module static we are considering. This is added to the local info when asm code is found that clobbers all memory. */ static bitmap all_module_statics; +/* Zero bitmap. */ +static bitmap no_module_statics; /* Set of all statics that should be ignored because they are touched by -fno-ipa-reference code. */ static bitmap ignore_module_statics; @@ -136,6 +141,31 @@ public: static ipa_ref_opt_summary_t *ipa_ref_opt_sum_summaries = NULL; +/* Return ID used by ipa-reference bitmaps. -1 if failed. */ +int +ipa_reference_var_uid (tree t) +{ + if (!ipa_reference_vars_map) + return -1; + int *id = ipa_reference_vars_map->get + (symtab_node::get (t)->ultimate_alias_target (NULL)->decl); + if (!id) + return -1; + return *id; +} + +/* Return ID used by ipa-reference bitmaps. Create new entry if + T is not in map. Set EXISTED accordinly */ +int +ipa_reference_var_get_or_insert_uid (tree t, bool *existed) +{ + int &id = ipa_reference_vars_map->get_or_insert + (symtab_node::get (t)->ultimate_alias_target (NULL)->decl, existed); + if (!*existed) + id = ipa_reference_vars_uids++; + return id; +} + /* Return the ipa_reference_vars structure starting from the cgraph NODE. */ static inline ipa_reference_vars_info_t get_reference_vars_info (struct cgraph_node *node) @@ -165,7 +195,7 @@ get_reference_optimization_summary (stru NULL if no data is available. */ bitmap -ipa_reference_get_not_read_global (struct cgraph_node *fn) +ipa_reference_get_read_global (struct cgraph_node *fn) { if (!opt_for_fn (current_function_decl, flag_ipa_reference)) return NULL; @@ -180,10 +210,10 @@ ipa_reference_get_not_read_global (struc || (avail == AVAIL_INTERPOSABLE && flags_from_decl_or_type (fn->decl) & ECF_LEAF)) && opt_for_fn (fn2->decl, flag_ipa_reference)) - return info->statics_not_read; + return info->statics_read; else if (avail == AVAIL_NOT_AVAILABLE && flags_from_decl_or_type (fn->decl) & ECF_LEAF) - return all_module_statics; + return no_module_statics; else return NULL; } @@ -194,7 +224,7 @@ ipa_reference_get_not_read_global (struc call. Returns NULL if no data is available. */ bitmap -ipa_reference_get_not_written_global (struct cgraph_node *fn) +ipa_reference_get_written_global (struct cgraph_node *fn) { if (!opt_for_fn (current_function_decl, flag_ipa_reference)) return NULL; @@ -209,10 +239,10 @@ ipa_reference_get_not_written_global (st || (avail == AVAIL_INTERPOSABLE && flags_from_decl_or_type (fn->decl) & ECF_LEAF)) && opt_for_fn (fn2->decl, flag_ipa_reference)) - return info->statics_not_written; + return info->statics_written; else if (avail == AVAIL_NOT_AVAILABLE && flags_from_decl_or_type (fn->decl) & ECF_LEAF) - return all_module_statics; + return no_module_statics; else return NULL; } @@ -256,7 +286,9 @@ is_improper (symtab_node *n, void *v ATT static inline bool is_proper_for_analysis (tree t) { - if (bitmap_bit_p (ignore_module_statics, ipa_reference_var_uid (t))) + int id = ipa_reference_var_uid (t); + + if (id != -1 && bitmap_bit_p (ignore_module_statics, id)) return false; if (symtab_node::get (t) @@ -272,9 +304,7 @@ is_proper_for_analysis (tree t) static const char * get_static_name (int index) { - splay_tree_node stn = - splay_tree_lookup (reference_vars_to_consider, index); - return fndecl_name ((tree)(stn->value)); + return fndecl_name ((*reference_vars_to_consider)[index]); } /* Dump a set of static vars to FILE. */ @@ -287,6 +317,8 @@ dump_static_vars_set_to_file (FILE *f, b return; else if (set == all_module_statics) fprintf (f, "ALL"); + else if (set == no_module_statics) + fprintf (f, "NO"); else EXECUTE_IF_SET_IN_BITMAP (set, 0, index, bi) { @@ -330,10 +362,12 @@ union_static_var_sets (bitmap &x, bitmap But if SET is NULL or the maximum set, return that instead. */ static bitmap -copy_static_var_set (bitmap set) +copy_static_var_set (bitmap set, bool for_propagation) { if (set == NULL || set == all_module_statics) return set; + if (!for_propagation && set == no_module_statics) + return set; bitmap_obstack *o = set->obstack; gcc_checking_assert (o); bitmap copy = BITMAP_ALLOC (o); @@ -403,6 +437,14 @@ propagate_bits (ipa_reference_global_var } } +/* Delete NODE from map. */ + +static void +varpool_removal_hook (varpool_node *node, void *) +{ + ipa_reference_vars_map->remove (node->decl); +} + static bool ipa_init_p = false; /* The init routine for analyzing global static variable usage. See @@ -415,22 +457,28 @@ ipa_init (void) ipa_init_p = true; - if (dump_file) - reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0); + vec_alloc (reference_vars_to_consider, 10); + + + if (ipa_ref_opt_sum_summaries != NULL) + { + delete ipa_ref_opt_sum_summaries; + ipa_ref_opt_sum_summaries = NULL; + delete ipa_reference_vars_map; + } + ipa_reference_vars_map = new reference_vars_map_t(257); + varpool_node_hooks + = symtab->add_varpool_removal_hook (varpool_removal_hook, NULL); + ipa_reference_vars_uids = 0; bitmap_obstack_initialize (&local_info_obstack); bitmap_obstack_initialize (&optimization_summary_obstack); all_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); + no_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); ignore_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); if (ipa_ref_var_info_summaries == NULL) ipa_ref_var_info_summaries = new ipa_ref_var_info_summary_t (symtab); - - if (ipa_ref_opt_sum_summaries != NULL) - { - delete ipa_ref_opt_sum_summaries; - ipa_ref_opt_sum_summaries = NULL; - } } @@ -465,6 +513,8 @@ analyze_function (struct cgraph_node *fn local = init_function_info (fn); for (i = 0; fn->iterate_reference (i, ref); i++) { + int id; + bool existed; if (!is_a (ref->referred)) continue; var = ref->referred->decl; @@ -472,23 +522,22 @@ analyze_function (struct cgraph_node *fn continue; /* This is a variable we care about. Check if we have seen it before, and if not add it the set of variables we care about. */ - if (all_module_statics - && bitmap_set_bit (all_module_statics, ipa_reference_var_uid (var))) + id = ipa_reference_var_get_or_insert_uid (var, &existed); + if (!existed) { + bitmap_set_bit (all_module_statics, id); if (dump_file) - splay_tree_insert (reference_vars_to_consider, - ipa_reference_var_uid (var), - (splay_tree_value)var); + reference_vars_to_consider->safe_push (var); } switch (ref->use) { case IPA_REF_LOAD: - bitmap_set_bit (local->statics_read, ipa_reference_var_uid (var)); + bitmap_set_bit (local->statics_read, id); break; case IPA_REF_STORE: if (ref->cannot_lead_to_return ()) break; - bitmap_set_bit (local->statics_written, ipa_reference_var_uid (var)); + bitmap_set_bit (local->statics_written, id); break; case IPA_REF_ADDR: break; @@ -510,10 +559,10 @@ ipa_ref_opt_summary_t::duplicate (cgraph ipa_reference_optimization_summary_d *dst_ginfo) { - dst_ginfo->statics_not_read = - copy_static_var_set (ginfo->statics_not_read); - dst_ginfo->statics_not_written = - copy_static_var_set (ginfo->statics_not_written); + dst_ginfo->statics_read = + copy_static_var_set (ginfo->statics_read, false); + dst_ginfo->statics_written = + copy_static_var_set (ginfo->statics_written, false); } /* Called when node is removed. */ @@ -522,13 +571,15 @@ void ipa_ref_opt_summary_t::remove (cgraph_node *, ipa_reference_optimization_summary_d *ginfo) { - if (ginfo->statics_not_read - && ginfo->statics_not_read != all_module_statics) - BITMAP_FREE (ginfo->statics_not_read); - - if (ginfo->statics_not_written - && ginfo->statics_not_written != all_module_statics) - BITMAP_FREE (ginfo->statics_not_written); + if (ginfo->statics_read + && ginfo->statics_read != all_module_statics + && ginfo->statics_read != no_module_statics) + BITMAP_FREE (ginfo->statics_read); + + if (ginfo->statics_written + && ginfo->statics_written != all_module_statics + && ginfo->statics_written != no_module_statics) + BITMAP_FREE (ginfo->statics_written); } /* Analyze each function in the cgraph to see which global or statics @@ -676,16 +727,23 @@ get_read_write_all_from_node (struct cgr } } -/* Skip edges from and to nodes without ipa_reference enables. This leave - them out of strongy connected coponents and makes them easyto skip in the +/* Skip edges from and to nodes without ipa_reference enabled. + Ignore not available symbols. This leave + them out of strongly connected components and makes them easy to skip in the propagation loop bellow. */ static bool ignore_edge_p (cgraph_edge *e) { - return (!opt_for_fn (e->caller->decl, flag_ipa_reference) - || !opt_for_fn (e->callee->function_symbol ()->decl, - flag_ipa_reference)); + enum availability avail; + cgraph_node *ultimate_target + = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); + + return (avail < AVAIL_INTERPOSABLE + || (avail == AVAIL_INTERPOSABLE + && !(flags_from_decl_or_type (e->callee->decl) & ECF_LEAF)) + || !opt_for_fn (e->caller->decl, flag_ipa_reference) + || !opt_for_fn (ultimate_target->decl, flag_ipa_reference)); } /* Produce the global information by preforming a transitive closure @@ -753,11 +811,12 @@ propagate (void) if (read_all) node_g->statics_read = all_module_statics; else - node_g->statics_read = copy_static_var_set (node_l->statics_read); + node_g->statics_read = copy_static_var_set (node_l->statics_read, true); if (write_all) node_g->statics_written = all_module_statics; else - node_g->statics_written = copy_static_var_set (node_l->statics_written); + node_g->statics_written + = copy_static_var_set (node_l->statics_written, true); /* Merge the sets of this cycle with all sets of callees reached from this cycle. */ @@ -841,12 +900,26 @@ propagate (void) ipa_reference_vars_info_t node_info; ipa_reference_global_vars_info_t node_g; + /* No need to produce summaries for inline clones. */ + if (node->inlined_to) + continue; + node_info = get_reference_vars_info (node); - if (!node->alias && opt_for_fn (node->decl, flag_ipa_reference) - && (node->get_availability () > AVAIL_INTERPOSABLE - || (flags_from_decl_or_type (node->decl) & ECF_LEAF))) + if (!node->alias && opt_for_fn (node->decl, flag_ipa_reference)) { node_g = &node_info->global; + bool read_all = + (node_g->statics_read == all_module_statics + || bitmap_equal_p (node_g->statics_read, all_module_statics)); + bool written_all = + (node_g->statics_written == all_module_statics + || bitmap_equal_p (node_g->statics_written, + all_module_statics)); + + /* There is no need to produce summary if we collected nothing + useful. */ + if (read_all && written_all) + continue; ipa_reference_optimization_summary_d *opt = ipa_ref_opt_sum_summaries->get_create (node); @@ -854,27 +927,25 @@ propagate (void) /* Create the complimentary sets. */ if (bitmap_empty_p (node_g->statics_read)) - opt->statics_not_read = all_module_statics; + opt->statics_read = no_module_statics; + else if (read_all) + opt->statics_read = all_module_statics; else { - opt->statics_not_read + opt->statics_read = BITMAP_ALLOC (&optimization_summary_obstack); - if (node_g->statics_read != all_module_statics) - bitmap_and_compl (opt->statics_not_read, - all_module_statics, - node_g->statics_read); + bitmap_copy (opt->statics_read, node_g->statics_read); } if (bitmap_empty_p (node_g->statics_written)) - opt->statics_not_written = all_module_statics; + opt->statics_written = no_module_statics; + else if (written_all) + opt->statics_written = all_module_statics; else { - opt->statics_not_written + opt->statics_written = BITMAP_ALLOC (&optimization_summary_obstack); - if (node_g->statics_written != all_module_statics) - bitmap_and_compl (opt->statics_not_written, - all_module_statics, - node_g->statics_written); + bitmap_copy (opt->statics_written, node_g->statics_written); } } } @@ -892,7 +963,7 @@ propagate (void) ipa_ref_var_info_summaries = NULL; if (dump_file) - splay_tree_delete (reference_vars_to_consider); + vec_free (reference_vars_to_consider); reference_vars_to_consider = NULL; return remove_p ? TODO_remove_functions : 0; } @@ -907,12 +978,10 @@ write_node_summary_p (struct cgraph_node ipa_reference_optimization_summary_t info; /* See if we have (non-empty) info. */ - if (!node->definition || node->global.inlined_to) + if (!node->definition || node->inlined_to) return false; info = get_reference_optimization_summary (node); - if (!info - || (bitmap_empty_p (info->statics_not_read) - && bitmap_empty_p (info->statics_not_written))) + if (!info) return false; /* See if we want to encode it. @@ -925,11 +994,17 @@ write_node_summary_p (struct cgraph_node && !referenced_from_this_partition_p (node, encoder)) return false; - /* See if the info has non-empty intersections with vars we want to encode. */ - if (!bitmap_intersect_p (info->statics_not_read, ltrans_statics) - && !bitmap_intersect_p (info->statics_not_written, ltrans_statics)) - return false; - return true; + /* See if the info has non-empty intersections with vars we want to + encode. */ + bitmap_iterator bi; + unsigned int i; + EXECUTE_IF_AND_COMPL_IN_BITMAP (ltrans_statics, info->statics_read, 0, + i, bi) + return true; + EXECUTE_IF_AND_COMPL_IN_BITMAP (ltrans_statics, info->statics_written, 0, + i, bi) + return true; + return false; } /* Stream out BITS<RANS_STATICS as list of decls to OB. @@ -962,8 +1037,7 @@ stream_out_bitmap (struct lto_simple_out return; EXECUTE_IF_AND_IN_BITMAP (bits, ltrans_statics, 0, index, bi) { - tree decl = (tree)splay_tree_lookup (reference_vars_to_consider, - index)->value; + tree decl = (*reference_vars_to_consider) [index]; lto_output_var_decl_index (ob->decl_state, ob->main_stream, decl); } } @@ -981,23 +1055,23 @@ ipa_reference_write_optimization_summary auto_bitmap ltrans_statics; int i; - reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0); + vec_alloc (reference_vars_to_consider, ipa_reference_vars_uids); + reference_vars_to_consider->safe_grow (ipa_reference_vars_uids); /* See what variables we are interested in. */ for (i = 0; i < lto_symtab_encoder_size (encoder); i++) { symtab_node *snode = lto_symtab_encoder_deref (encoder, i); varpool_node *vnode = dyn_cast (snode); + int id; + if (vnode - && bitmap_bit_p (all_module_statics, - ipa_reference_var_uid (vnode->decl)) + && (id = ipa_reference_var_uid (vnode->decl)) != -1 && referenced_from_this_partition_p (vnode, encoder)) { tree decl = vnode->decl; - bitmap_set_bit (ltrans_statics, ipa_reference_var_uid (decl)); - splay_tree_insert (reference_vars_to_consider, - ipa_reference_var_uid (decl), - (splay_tree_value)decl); + bitmap_set_bit (ltrans_statics, id); + (*reference_vars_to_consider)[id] = decl; ltrans_statics_bitcount ++; } } @@ -1032,14 +1106,14 @@ ipa_reference_write_optimization_summary node_ref = lto_symtab_encoder_encode (encoder, snode); streamer_write_uhwi_stream (ob->main_stream, node_ref); - stream_out_bitmap (ob, info->statics_not_read, ltrans_statics, + stream_out_bitmap (ob, info->statics_read, ltrans_statics, ltrans_statics_bitcount); - stream_out_bitmap (ob, info->statics_not_written, ltrans_statics, + stream_out_bitmap (ob, info->statics_written, ltrans_statics, ltrans_statics_bitcount); } } lto_destroy_simple_output_block (ob); - splay_tree_delete (reference_vars_to_consider); + delete reference_vars_to_consider; } /* Deserialize the ipa info for lto. */ @@ -1053,10 +1127,15 @@ ipa_reference_read_optimization_summary unsigned int j = 0; bitmap_obstack_initialize (&optimization_summary_obstack); - if (ipa_ref_opt_sum_summaries == NULL) - ipa_ref_opt_sum_summaries = new ipa_ref_opt_summary_t (symtab); + gcc_checking_assert (ipa_ref_opt_sum_summaries == NULL); + ipa_ref_opt_sum_summaries = new ipa_ref_opt_summary_t (symtab); + ipa_reference_vars_map = new reference_vars_map_t(257); + varpool_node_hooks + = symtab->add_varpool_removal_hook (varpool_removal_hook, NULL); + ipa_reference_vars_uids = 0; all_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); + no_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); while ((file_data = file_data_vec[j++])) { @@ -1081,8 +1160,11 @@ ipa_reference_read_optimization_summary unsigned int var_index = streamer_read_uhwi (ib); tree v_decl = lto_file_decl_data_get_var_decl (file_data, var_index); + bool existed; bitmap_set_bit (all_module_statics, - ipa_reference_var_uid (v_decl)); + ipa_reference_var_get_or_insert_uid + (v_decl, &existed)); + gcc_checking_assert (!existed); if (dump_file) fprintf (dump_file, " %s", fndecl_name (v_decl)); } @@ -1102,57 +1184,65 @@ ipa_reference_read_optimization_summary ipa_reference_optimization_summary_d *info = ipa_ref_opt_sum_summaries->get_create (node); - info->statics_not_read = BITMAP_ALLOC - (&optimization_summary_obstack); - info->statics_not_written = BITMAP_ALLOC - (&optimization_summary_obstack); if (dump_file) fprintf (dump_file, - "\nFunction name:%s:\n static not read:", + "\nFunction name:%s:\n static read:", node->dump_asm_name ()); - /* Set the statics not read. */ + /* Set the statics read. */ v_count = streamer_read_hwi (ib); if (v_count == -1) { - info->statics_not_read = all_module_statics; + info->statics_read = all_module_statics; if (dump_file) fprintf (dump_file, " all module statics"); } + else if (v_count == 0) + info->statics_read = no_module_statics; else - for (j = 0; j < (unsigned int)v_count; j++) - { - unsigned int var_index = streamer_read_uhwi (ib); - tree v_decl = lto_file_decl_data_get_var_decl (file_data, - var_index); - bitmap_set_bit (info->statics_not_read, - ipa_reference_var_uid (v_decl)); - if (dump_file) - fprintf (dump_file, " %s", fndecl_name (v_decl)); - } + { + info->statics_read = BITMAP_ALLOC + (&optimization_summary_obstack); + for (j = 0; j < (unsigned int)v_count; j++) + { + unsigned int var_index = streamer_read_uhwi (ib); + tree v_decl = lto_file_decl_data_get_var_decl (file_data, + var_index); + bitmap_set_bit (info->statics_read, + ipa_reference_var_uid (v_decl)); + if (dump_file) + fprintf (dump_file, " %s", fndecl_name (v_decl)); + } + } if (dump_file) fprintf (dump_file, - "\n static not written:"); - /* Set the statics not written. */ + "\n static written:"); + /* Set the statics written. */ v_count = streamer_read_hwi (ib); if (v_count == -1) { - info->statics_not_written = all_module_statics; + info->statics_written = all_module_statics; if (dump_file) fprintf (dump_file, " all module statics"); } + else if (v_count == 0) + info->statics_written = no_module_statics; else - for (j = 0; j < (unsigned int)v_count; j++) - { - unsigned int var_index = streamer_read_uhwi (ib); - tree v_decl = lto_file_decl_data_get_var_decl (file_data, - var_index); - bitmap_set_bit (info->statics_not_written, - ipa_reference_var_uid (v_decl)); - if (dump_file) - fprintf (dump_file, " %s", fndecl_name (v_decl)); - } + { + info->statics_written = BITMAP_ALLOC + (&optimization_summary_obstack); + for (j = 0; j < (unsigned int)v_count; j++) + { + unsigned int var_index = streamer_read_uhwi (ib); + tree v_decl = lto_file_decl_data_get_var_decl (file_data, + var_index); + bitmap_set_bit (info->statics_written, + ipa_reference_var_uid (v_decl)); + if (dump_file) + fprintf (dump_file, " %s", fndecl_name (v_decl)); + } + } if (dump_file) fprintf (dump_file, "\n"); } @@ -1233,6 +1323,9 @@ ipa_reference_c_finalize (void) { delete ipa_ref_opt_sum_summaries; ipa_ref_opt_sum_summaries = NULL; + delete ipa_reference_vars_map; + ipa_reference_vars_map = NULL; + symtab->remove_varpool_removal_hook (varpool_node_hooks); } if (ipa_init_p) diff -Nurp a/gcc/ipa-reference.h b/gcc/ipa-reference.h --- a/gcc/ipa-reference.h 2020-04-30 15:14:04.580000000 +0800 +++ b/gcc/ipa-reference.h 2020-04-30 15:14:56.540000000 +0800 @@ -22,15 +22,10 @@ along with GCC; see the file COPYING3. #define GCC_IPA_REFERENCE_H /* In ipa-reference.c */ -bitmap ipa_reference_get_not_read_global (struct cgraph_node *fn); -bitmap ipa_reference_get_not_written_global (struct cgraph_node *fn); +bitmap ipa_reference_get_read_global (struct cgraph_node *fn); +bitmap ipa_reference_get_written_global (struct cgraph_node *fn); void ipa_reference_c_finalize (void); - -inline int -ipa_reference_var_uid (tree t) -{ - return DECL_UID (symtab_node::get (t)->ultimate_alias_target (NULL)->decl); -} +int ipa_reference_var_uid (tree t); #endif /* GCC_IPA_REFERENCE_H */ diff -Nurp a/gcc/ipa-utils.c b/gcc/ipa-utils.c --- a/gcc/ipa-utils.c 2020-04-30 15:14:04.576000000 +0800 +++ b/gcc/ipa-utils.c 2020-04-30 15:14:56.588000000 +0800 @@ -103,8 +103,7 @@ searchc (struct searchc_env* env, struct continue; if (w->aux - && (avail > AVAIL_INTERPOSABLE - || avail == AVAIL_INTERPOSABLE)) + && (avail >= AVAIL_INTERPOSABLE)) { w_info = (struct ipa_dfs_info *) w->aux; if (w_info->new_node) @@ -297,7 +296,7 @@ ipa_reverse_postorder (struct cgraph_nod if (!node->aux && (pass || (!node->address_taken - && !node->global.inlined_to + && !node->inlined_to && !node->alias && !node->thunk.thunk_p && !node->only_called_directly_p ()))) { diff -Nurp a/gcc/ipa-utils.h b/gcc/ipa-utils.h --- a/gcc/ipa-utils.h 2020-04-30 15:14:04.652000000 +0800 +++ b/gcc/ipa-utils.h 2020-04-30 15:14:56.624000000 +0800 @@ -47,6 +47,9 @@ void ipa_merge_profiles (struct cgraph_n struct cgraph_node *src, bool preserve_body = false); bool recursive_call_p (tree, tree); +/* In ipa-prop.c */ +void ipa_remove_useless_jump_functions (); + /* In ipa-profile.c */ bool ipa_propagate_frequency (struct cgraph_node *node); @@ -54,6 +57,7 @@ bool ipa_propagate_frequency (struct cgr struct odr_type_d; typedef odr_type_d *odr_type; +extern bool thunk_expansion; void build_type_inheritance_graph (void); void rebuild_type_inheritance_graph (void); void update_type_inheritance_graph (void); @@ -263,5 +267,3 @@ odr_type_p (const_tree t) } #endif /* GCC_IPA_UTILS_H */ - - diff -Nurp a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c --- a/gcc/ipa-visibility.c 2020-04-30 15:14:04.568000000 +0800 +++ b/gcc/ipa-visibility.c 2020-04-30 15:14:56.588000000 +0800 @@ -707,7 +707,7 @@ function_and_variable_visibility (bool w || DECL_EXTERNAL (node->decl)); if (cgraph_externally_visible_p (node, whole_program)) { - gcc_assert (!node->global.inlined_to); + gcc_assert (!node->inlined_to); node->externally_visible = true; } else diff -Nurp a/gcc/lto/lto.c b/gcc/lto/lto.c --- a/gcc/lto/lto.c 2020-04-30 15:14:04.664000000 +0800 +++ b/gcc/lto/lto.c 2020-04-30 15:14:56.552000000 +0800 @@ -3211,9 +3211,9 @@ do_whole_program_analysis (void) else gcc_unreachable (); - /* Inline summaries are needed for balanced partitioning. Free them now so + /* Size summaries are needed for balanced partitioning. Free them now so the memory can be used for streamer caches. */ - ipa_free_fn_summary (); + ipa_free_size_summary (); /* AUX pointers are used by partitioning code to bookkeep number of partitions symbol is in. This is no longer needed. */ diff -Nurp a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c --- a/gcc/lto/lto-partition.c 2020-04-30 15:14:04.664000000 +0800 +++ b/gcc/lto/lto-partition.c 2020-04-30 15:14:56.592000000 +0800 @@ -171,7 +171,7 @@ add_symbol_to_partition_1 (ltrans_partit { struct cgraph_edge *e; if (!node->alias && c == SYMBOL_PARTITION) - part->insns += ipa_fn_summaries->get (cnode)->size; + part->insns += ipa_size_summaries->get (cnode)->size; /* Add all inline clones and callees that are duplicated. */ for (e = cnode->callees; e; e = e->next_callee) @@ -182,7 +182,7 @@ add_symbol_to_partition_1 (ltrans_partit /* Add all thunks associated with the function. */ for (e = cnode->callers; e; e = e->next_caller) - if (e->caller->thunk.thunk_p && !e->caller->global.inlined_to) + if (e->caller->thunk.thunk_p && !e->caller->inlined_to) add_symbol_to_partition_1 (part, e->caller); } @@ -233,8 +233,8 @@ contained_in_symbol (symtab_node *node) if (cgraph_node *cnode = dyn_cast (node)) { cnode = cnode->function_symbol (); - if (cnode->global.inlined_to) - cnode = cnode->global.inlined_to; + if (cnode->inlined_to) + cnode = cnode->inlined_to; return cnode; } else if (varpool_node *vnode = dyn_cast (node)) @@ -291,7 +291,7 @@ undo_partition (ltrans_partition partiti if (!node->alias && (cnode = dyn_cast (node)) && node->get_partitioning_class () == SYMBOL_PARTITION) - partition->insns -= ipa_fn_summaries->get (cnode)->size; + partition->insns -= ipa_size_summaries->get (cnode)->size; lto_symtab_encoder_delete_node (partition->encoder, node); node->aux = (void *)((size_t)node->aux - 1); } @@ -529,7 +529,7 @@ lto_balanced_map (int n_lto_partitions, else order.safe_push (node); if (!node->alias) - total_size += ipa_fn_summaries->get (node)->size; + total_size += ipa_size_summaries->get (node)->size; } original_total_size = total_size; diff -Nurp a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c --- a/gcc/lto/lto-symtab.c 2020-04-30 15:14:04.664000000 +0800 +++ b/gcc/lto/lto-symtab.c 2020-04-30 15:14:56.592000000 +0800 @@ -63,7 +63,7 @@ lto_cgraph_replace_node (struct cgraph_n prevailing_node->forced_by_abi = true; if (node->address_taken) { - gcc_assert (!prevailing_node->global.inlined_to); + gcc_assert (!prevailing_node->inlined_to); prevailing_node->mark_address_taken (); } if (node->definition && prevailing_node->definition @@ -909,7 +909,7 @@ lto_symtab_merge_symbols_1 (symtab_node cgraph_node *ce = dyn_cast (e); if ((!TREE_PUBLIC (e->decl) && !DECL_EXTERNAL (e->decl)) - || (ce != NULL && ce->global.inlined_to)) + || (ce != NULL && ce->inlined_to)) continue; symtab_node *to = symtab_node::get (lto_symtab_prevailing_decl (e->decl)); diff -Nurp a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c --- a/gcc/lto-cgraph.c 2020-04-30 15:14:04.636000000 +0800 +++ b/gcc/lto-cgraph.c 2020-04-30 15:14:56.588000000 +0800 @@ -329,7 +329,7 @@ reachable_from_other_partition_p (struct struct cgraph_edge *e; if (!node->definition) return false; - if (node->global.inlined_to) + if (node->inlined_to) return false; for (e = node->callers; e; e = e->next_caller) { @@ -399,7 +399,7 @@ lto_output_node (struct lto_simple_outpu boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node); if (node->analyzed && (!boundary_p || node->alias - || (node->thunk.thunk_p && !node->global.inlined_to))) + || (node->thunk.thunk_p && !node->inlined_to))) tag = LTO_symtab_analyzed_node; else tag = LTO_symtab_unavail_node; @@ -422,7 +422,7 @@ lto_output_node (struct lto_simple_outpu && node->get_partitioning_class () == SYMBOL_PARTITION) { /* Inline clones cannot be part of boundary. - gcc_assert (!node->global.inlined_to); + gcc_assert (!node->inlined_to); FIXME: At the moment they can be, when partition contains an inline clone that is clone of inline clone from outside partition. We can @@ -468,9 +468,9 @@ lto_output_node (struct lto_simple_outpu if (tag == LTO_symtab_analyzed_node) { - if (node->global.inlined_to) + if (node->inlined_to) { - ref = lto_symtab_encoder_lookup (encoder, node->global.inlined_to); + ref = lto_symtab_encoder_lookup (encoder, node->inlined_to); gcc_assert (ref != LCC_NOT_FOUND); } else @@ -884,7 +884,7 @@ compute_ltrans_boundary (lto_symtab_enco if (!lto_symtab_encoder_in_partition_p (encoder, callee)) { /* We should have moved all the inlines. */ - gcc_assert (!callee->global.inlined_to); + gcc_assert (!callee->inlined_to); add_node_to (encoder, callee, false); } } @@ -911,7 +911,7 @@ compute_ltrans_boundary (lto_symtab_enco && !lto_symtab_encoder_in_partition_p (encoder, callee)) { - gcc_assert (!callee->global.inlined_to); + gcc_assert (!callee->inlined_to); add_node_to (encoder, callee, false); } } @@ -928,7 +928,7 @@ compute_ltrans_boundary (lto_symtab_enco if (node->alias && node->analyzed) create_references (encoder, node); if (cnode - && cnode->thunk.thunk_p && !cnode->global.inlined_to) + && cnode->thunk.thunk_p && !cnode->inlined_to) add_node_to (encoder, cnode->callees->callee, false); while (node->transparent_alias && node->analyzed) { @@ -984,7 +984,7 @@ output_symtab (void) { node = dyn_cast (lto_symtab_encoder_deref (encoder, i)); if (node - && ((node->thunk.thunk_p && !node->global.inlined_to) + && ((node->thunk.thunk_p && !node->inlined_to) || lto_symtab_encoder_in_partition_p (encoder, node))) { output_outgoing_cgraph_edges (node->callees, ob, encoder); @@ -1283,7 +1283,7 @@ input_node (struct lto_file_decl_data *f input_overwrite_node (file_data, node, tag, &bp); /* Store a reference for now, and fix up later to be a pointer. */ - node->global.inlined_to = (cgraph_node *) (intptr_t) ref; + node->inlined_to = (cgraph_node *) (intptr_t) ref; if (group) { @@ -1542,7 +1542,7 @@ input_cgraph_1 (struct lto_file_decl_dat int ref; if (cgraph_node *cnode = dyn_cast (node)) { - ref = (int) (intptr_t) cnode->global.inlined_to; + ref = (int) (intptr_t) cnode->inlined_to; /* We share declaration of builtins, so we may read same node twice. */ if (!node->aux) @@ -1551,10 +1551,10 @@ input_cgraph_1 (struct lto_file_decl_dat /* Fixup inlined_to from reference to pointer. */ if (ref != LCC_NOT_FOUND) - dyn_cast (node)->global.inlined_to + dyn_cast (node)->inlined_to = dyn_cast (nodes[ref]); else - cnode->global.inlined_to = NULL; + cnode->inlined_to = NULL; } ref = (int) (intptr_t) node->same_comdat_group; diff -Nurp a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c --- a/gcc/omp-simd-clone.c 2020-04-30 15:14:04.644000000 +0800 +++ b/gcc/omp-simd-clone.c 2020-04-30 15:14:56.592000000 +0800 @@ -1635,7 +1635,7 @@ expand_simd_clones (struct cgraph_node * tree attr = lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)); if (attr == NULL_TREE - || node->global.inlined_to + || node->inlined_to || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl))) return; diff -Nurp a/gcc/params.def b/gcc/params.def --- a/gcc/params.def 2020-04-30 15:14:04.560000000 +0800 +++ b/gcc/params.def 2020-04-30 15:14:56.700000000 +0800 @@ -1093,6 +1093,18 @@ DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE, "interprocedural constant propagation.", 8, 0, 0) +DEFPARAM (PARAM_IPA_CP_MIN_RECURSIVE_PROBABILITY, + "ipa-cp-min-recursive-probability", + "Recursive cloning only when the probability of call being executed " + "exceeds the parameter. ", + 2, 0, 0) + +DEFPARAM (PARAM_IPA_CP_MAX_RECURSIVE_DEPTH, + "ipa-cp-max-recursive-depth", + "Threshold ipa-cp opportunity evaluation that is still considered " + "Maximum depth of recursive cloning for self-recursive function.", + 8, 0, 0) + DEFPARAM (PARAM_IPA_CP_EVAL_THRESHOLD, "ipa-cp-eval-threshold", "Threshold ipa-cp opportunity evaluation that is still considered " @@ -1129,6 +1141,18 @@ DEFPARAM (PARAM_IPA_MAX_AA_STEPS, "parameter analysis based on alias analysis in any given function.", 25000, 0, 0) +DEFPARAM (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS, + "ipa-max-switch-predicate-bounds", + "Maximal number of boundary endpoints of case ranges of switch " + "statement used during IPA functoin summary generation.", + 5, 0, 0) + +DEFPARAM (PARAM_IPA_MAX_PARAM_EXPR_OPS, + "ipa-max-param-expr-ops", + "Maximum number of operations in a parameter expression that can " + "be handled by IPA analysis.", + 10, 0, 0) + /* WHOPR partitioning configuration. */ DEFPARAM (PARAM_LTO_PARTITIONS, diff -Nurp a/gcc/passes.c b/gcc/passes.c --- a/gcc/passes.c 2020-04-30 15:14:04.632000000 +0800 +++ b/gcc/passes.c 2020-04-30 15:14:56.592000000 +0800 @@ -3047,7 +3047,7 @@ function_called_by_processed_nodes_p (vo continue; if (TREE_ASM_WRITTEN (e->caller->decl)) continue; - if (!e->caller->process && !e->caller->global.inlined_to) + if (!e->caller->process && !e->caller->inlined_to) break; } if (dump_file && e) diff -Nurp a/gcc/symtab.c b/gcc/symtab.c --- a/gcc/symtab.c 2020-04-30 15:14:04.636000000 +0800 +++ b/gcc/symtab.c 2020-04-30 15:14:56.592000000 +0800 @@ -1874,7 +1874,7 @@ symtab_node::get_partitioning_class (voi if (DECL_ABSTRACT_P (decl)) return SYMBOL_EXTERNAL; - if (cnode && cnode->global.inlined_to) + if (cnode && cnode->inlined_to) return SYMBOL_DUPLICATE; /* Transparent aliases are always duplicated. */ @@ -2274,7 +2274,7 @@ symtab_node::binds_to_current_def_p (sym return true; /* Inline clones always binds locally. */ - if (cnode && cnode->global.inlined_to) + if (cnode && cnode->inlined_to) return true; if (DECL_EXTERNAL (decl)) @@ -2286,7 +2286,7 @@ symtab_node::binds_to_current_def_p (sym { cgraph_node *cref = dyn_cast (ref); if (cref) - ref = cref->global.inlined_to; + ref = cref->inlined_to; } /* If this is a reference from symbol itself and there are no aliases, we diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/flatten.c b/gcc/testsuite/gcc.c-torture/compile/flatten.c --- a/gcc/testsuite/gcc.c-torture/compile/flatten.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.c-torture/compile/flatten.c 2020-04-30 15:14:56.684000000 +0800 @@ -0,0 +1,5 @@ +int you_shall_not_flatten_me () __attribute__ ((flatten)); +main() +{ + you_shall_not_flatten_me (); +} diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c --- a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c 2020-04-30 15:14:56.696000000 +0800 @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */ + +int fn(); + +int data[100]; + +int recur_fn (int i) +{ + int j; + + if (i == 6) + { + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + fn(); + return 10; + } + + data[i] = i; + + for (j = 0; j < 100; j++) + recur_fn (i + 1); + + return i; +} + +int main () +{ + int i; + + for (i = 0; i < 100; i++) + recur_fn (1) + recur_fn (-5); + + return 1; +} + +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c --- a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c 2020-04-30 15:14:56.664000000 +0800 @@ -0,0 +1,78 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */ + +int data1; + +int callee1(int *v) +{ + if (*v < 2) + return 0; + else + { + int t = data1; + + data1 = *v; + *v = t; + + return 1; + } +} + +int __attribute__((pure)) callee2(int *v) +{ + if (*v < 2) + return 0; + else + { + data1 = v[0] + v[2]; + + return 1; + } +} + +int caller1(int c, int *r) +{ + int a = 1; + + if (c) + return callee1(&a); + else + { + *r = 2; + return callee1(r); + } +} + +int data2[200]; +int data3; + +int __attribute__((const)) gen_cond(int); + +int caller2(void) +{ + int i, j; + int sum = 0; + int a[8]; + + a[0] = 3; + for (i = 0; i < 100; i++) + { + if (gen_cond (i)) + continue; + + a[2] = 4; + for (j = 0; j < 100; j++) + { + data2[i + j] = (i ^ j) + data3; + + sum += callee2(a); + } + } + + return sum; +} + +/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 1" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 2" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 3" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "offset: 64, type: int, CONST: 4" 1 "cp" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c --- a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c 2020-04-30 15:14:56.664000000 +0800 @@ -0,0 +1,77 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details -fno-early-inlining" } */ +/* { dg-add-options bind_pic_locally } */ + +struct S +{ + int a, b, c; +}; + +void *blah(int, void *); + +#define foo_body(p)\ +{ \ + int i, c = (p)->c; \ + int b = (p)->b; \ + void *v = (void *) (p); \ + \ + for (i= 0; i< c; i++) \ + v = blah(b + i, v); \ +} + +static void __attribute__ ((noinline)) +foo_v (struct S s) +{ + foo_body (&s); +} + +static void __attribute__ ((noinline)) +foo_r (struct S *p) +{ + foo_body (p); +} + +static void +goo_v (int a, int *p) +{ + struct S s; + s.a = 101; + s.b = a % 7; + s.c = *p + 6; + foo_v (s); +} + +static void +goo_r (int a, struct S n) +{ + struct S s; + s.a = 1; + s.b = a + 5; + s.c = -n.b; + foo_r (&s); +} + +void +entry () +{ + int a; + int v; + struct S s; + + a = 9; + v = 3; + goo_v (a, &v); + + a = 100; + s.b = 18; + goo_r (a, s); +} + +/* { dg-final { scan-ipa-dump "offset: 0, type: int, CONST: 1" "cp" } } */ +/* { dg-final { scan-ipa-dump "offset: 32, type: int, PASS THROUGH: 0, op plus_expr 5" "cp" } } */ +/* { dg-final { scan-ipa-dump "offset: 64, type: int, LOAD AGG: 1 \\\[offset: 32, by value], op negate_expr" "cp" } } */ +/* { dg-final { scan-ipa-dump "offset: 0, type: int, CONST: 101" "cp" } } */ +/* { dg-final { scan-ipa-dump "offset: 32, type: int, PASS THROUGH: 0, op trunc_mod_expr 7" "cp" } } */ +/* { dg-final { scan-ipa-dump "offset: 64, type: int, LOAD AGG: 1 \\\[offset: 0, by reference], op plus_expr 6" "cp" } } */ +/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=1, 0\\\[32]=105, 0\\\[64]=-18" "cp" } } */ +/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=101, 0\\\[32]=2, 0\\\[64]=9" "cp" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/ipa/pr91089.c b/gcc/testsuite/gcc.dg/ipa/pr91089.c --- a/gcc/testsuite/gcc.dg/ipa/pr91089.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/ipa/pr91089.c 2020-04-30 15:14:56.516000000 +0800 @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details -fdump-ipa-fnsummary-details --param ipa-max-switch-predicate-bounds=10 -fno-inline" } */ + +int fn (); + +int data; + +int callee (int i) +{ + switch (i) + { + case -126: return i + 13; + case -127: return i + 5; + case -8: return i * i; + case 0: return i % 9; + case 5: + case 7: + case 6: return 3; + default: + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + fn (); + } + + return data += i; +} + +int caller () +{ + return callee (-127) + + callee (-126) + + callee (-8) + + callee (0) + + callee (5) + + callee (6) + + callee (7) + + callee (100); +} + +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of callee" 7 "cp" } } */ +/* { dg-final { scan-ipa-dump "op0 < -127" "fnsummary" } } */ +/* { dg-final { scan-ipa-dump "op0 > -126" "fnsummary" } } */ +/* { dg-final { scan-ipa-dump "op0 != -8" "fnsummary" } } */ +/* { dg-final { scan-ipa-dump "op0 != 0" "fnsummary" } } */ +/* { dg-final { scan-ipa-dump "op0 < 5" "fnsummary" } } */ +/* { dg-final { scan-ipa-dump "op0 > 7" "fnsummary" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c b/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c 2020-04-30 15:14:05.756000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c 2020-04-30 15:14:56.640000000 +0800 @@ -19,9 +19,12 @@ main() { /* Make sure we perform indirect inlining of one and two and optimize the result to a constant. */ - if (print(one) != 3) - link_error (); - if (print(two) != 5) - link_error (); + for (int i = 0; i < 100; i++) + { + if (print(one) != 3) + link_error (); + if (print(two) != 5) + link_error (); + } return 0; } diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c 2020-04-30 15:14:56.472000000 +0800 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2))); +int foo (int *a) +{ + a[0] = 1; + a[1] = 2; + v2si x = *(v2si *)a; + *(v2si *)&a[2] = x; + return a[3]; +} + +/* { dg-final { scan-tree-dump "return 2;" "fre1" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c 2020-04-30 15:14:56.472000000 +0800 @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4))); +int foo (int *a) +{ + a[2] = 2; + a[0] = 0; + a[1] = 1; + a[3] = 4; + v4si x = *(v4si *)a; + *(v4si *)&a[4] = x; + return a[4] + a[7]; +} + +/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c 2020-04-30 15:14:56.472000000 +0800 @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4))); +int foo (int *a) +{ + __builtin_memset (a, 0, 2 * __SIZEOF_INT__); + a[2] = 2; + a[0] = 1; + a[3] = 3; + v4si x = *(v4si *)a; + *(v4si *)&a[4] = x; + return a[4] + a[5] + a[7]; +} + +/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */ diff -Nurp a/gcc/tree-sra.c b/gcc/tree-sra.c --- a/gcc/tree-sra.c 2020-04-30 15:14:04.568000000 +0800 +++ b/gcc/tree-sra.c 2020-04-30 15:14:56.556000000 +0800 @@ -5488,7 +5488,7 @@ ipa_sra_preliminary_function_checks (str if ((DECL_ONE_ONLY (node->decl) || DECL_EXTERNAL (node->decl)) && ipa_fn_summaries->get (node) - && ipa_fn_summaries->get (node)->size >= MAX_INLINE_INSNS_AUTO) + && ipa_size_summaries->get (node)->size >= MAX_INLINE_INSNS_AUTO) { if (dump_file) fprintf (dump_file, "Function too big to be made truly local.\n"); diff -Nurp a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c --- a/gcc/tree-ssa-alias.c 2020-04-30 15:14:04.648000000 +0800 +++ b/gcc/tree-ssa-alias.c 2020-04-30 15:14:56.540000000 +0800 @@ -1822,14 +1822,16 @@ ref_maybe_used_by_call_p_1 (gcall *call, if (callee != NULL_TREE && VAR_P (base) && TREE_STATIC (base)) { struct cgraph_node *node = cgraph_node::get (callee); - bitmap not_read; + bitmap read; + int id; /* FIXME: Callee can be an OMP builtin that does not have a call graph node yet. We should enforce that there are nodes for all decls in the IL and remove this check instead. */ if (node - && (not_read = ipa_reference_get_not_read_global (node)) - && bitmap_bit_p (not_read, ipa_reference_var_uid (base))) + && (id = ipa_reference_var_uid (base)) != -1 + && (read = ipa_reference_get_read_global (node)) + && !bitmap_bit_p (read, id)) goto process_args; } @@ -2217,11 +2219,13 @@ call_may_clobber_ref_p_1 (gcall *call, a if (callee != NULL_TREE && VAR_P (base) && TREE_STATIC (base)) { struct cgraph_node *node = cgraph_node::get (callee); - bitmap not_written; + bitmap written; + int id; if (node - && (not_written = ipa_reference_get_not_written_global (node)) - && bitmap_bit_p (not_written, ipa_reference_var_uid (base))) + && (id = ipa_reference_var_uid (base)) != -1 + && (written = ipa_reference_get_written_global (node)) + && !bitmap_bit_p (written, id)) return false; } diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c --- a/gcc/tree-ssa-sccvn.c 2020-04-30 15:14:04.632000000 +0800 +++ b/gcc/tree-ssa-sccvn.c 2020-04-30 15:14:56.480000000 +0800 @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. #include "config.h" #include "system.h" #include "coretypes.h" +#include "splay-tree.h" #include "backend.h" #include "rtl.h" #include "tree.h" @@ -361,6 +362,8 @@ static void init_vn_nary_op_from_stmt (v static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int, enum tree_code, tree, tree *); static tree vn_lookup_simplify_result (gimple_match_op *); +static vn_reference_t vn_reference_lookup_or_insert_for_pieces + (tree, alias_set_type, tree, vec, tree); /* Return whether there is value numbering information for a given SSA name. */ @@ -1676,20 +1679,245 @@ vn_reference_lookup_1 (vn_reference_t vr return NULL_TREE; } + +/* Partial definition tracking support. */ + +struct pd_range +{ + HOST_WIDE_INT offset; + HOST_WIDE_INT size; +}; + +struct pd_data +{ + tree rhs; + HOST_WIDE_INT offset; + HOST_WIDE_INT size; +}; + +/* Context for alias walking. */ + struct vn_walk_cb_data { vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_, - vn_lookup_kind vn_walk_kind_, bool tbaa_p_) + vn_lookup_kind vn_walk_kind_, bool tbaa_p_) : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_), - tbaa_p (tbaa_p_) - {} + tbaa_p (tbaa_p_), known_ranges (NULL) + {} + ~vn_walk_cb_data (); + void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT); vn_reference_t vr; tree *last_vuse_ptr; vn_lookup_kind vn_walk_kind; bool tbaa_p; + + /* The VDEFs of partial defs we come along. */ + auto_vec partial_defs; + /* The first defs range to avoid splay tree setup in most cases. */ + pd_range first_range; + tree first_vuse; + splay_tree known_ranges; + obstack ranges_obstack; }; +vn_walk_cb_data::~vn_walk_cb_data () +{ + if (known_ranges) + { + splay_tree_delete (known_ranges); + obstack_free (&ranges_obstack, NULL); + } +} + +/* pd_range splay-tree helpers. */ + +static int +pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p) +{ + HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p; + HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p; + if (offset1 < offset2) + return -1; + else if (offset1 > offset2) + return 1; + return 0; +} + +static void * +pd_tree_alloc (int size, void *data_) +{ + vn_walk_cb_data *data = (vn_walk_cb_data *)data_; + return obstack_alloc (&data->ranges_obstack, size); +} + +static void +pd_tree_dealloc (void *, void *) +{ +} + +/* Push PD to the vector of partial definitions returning a + value when we are ready to combine things with VUSE and MAXSIZEI, + NULL when we want to continue looking for partial defs or -1 + on failure. */ + +void * +vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse, + HOST_WIDE_INT maxsizei) +{ + if (partial_defs.is_empty ()) + { + partial_defs.safe_push (pd); + first_range.offset = pd.offset; + first_range.size = pd.size; + first_vuse = vuse; + last_vuse_ptr = NULL; + } + else + { + if (!known_ranges) + { + /* ??? Optimize the case where the second partial def + completes things. */ + gcc_obstack_init (&ranges_obstack); + known_ranges + = splay_tree_new_with_allocator (pd_range_compare, 0, 0, + pd_tree_alloc, + pd_tree_dealloc, this); + splay_tree_insert (known_ranges, + (splay_tree_key)&first_range.offset, + (splay_tree_value)&first_range); + } + if (known_ranges) + { + pd_range newr = { pd.offset, pd.size }; + splay_tree_node n; + pd_range *r; + /* Lookup the predecessor of offset + 1 and see if + we need to merge with it. */ + HOST_WIDE_INT loffset = newr.offset + 1; + if ((n = splay_tree_predecessor (known_ranges, + (splay_tree_key)&loffset)) + && ((r = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + newr.offset, newr.size)) + { + /* Ignore partial defs already covered. */ + if (known_subrange_p (newr.offset, newr.size, + r->offset, r->size)) + return NULL; + r->size = MAX (r->offset + r->size, + newr.offset + newr.size) - r->offset; + } + else + { + /* newr.offset wasn't covered yet, insert the + range. */ + r = XOBNEW (&ranges_obstack, pd_range); + *r = newr; + splay_tree_insert (known_ranges, + (splay_tree_key)&r->offset, + (splay_tree_value)r); + } + /* Merge r which now contains newr and is a member + of the splay tree with adjacent overlapping ranges. */ + pd_range *rafter; + while ((n = splay_tree_successor (known_ranges, + (splay_tree_key)&r->offset)) + && ((rafter = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + rafter->offset, rafter->size)) + { + r->size = MAX (r->offset + r->size, + rafter->offset + rafter->size) - r->offset; + splay_tree_remove (known_ranges, + (splay_tree_key)&rafter->offset); + } + partial_defs.safe_push (pd); + + /* Now we have merged newr into the range tree. + When we have covered [offseti, sizei] then the + tree will contain exactly one node which has + the desired properties and it will be 'r'. */ + if (known_subrange_p (0, maxsizei / BITS_PER_UNIT, + r->offset, r->size)) + { + /* Now simply native encode all partial defs + in reverse order. */ + unsigned ndefs = partial_defs.length (); + /* We support up to 512-bit values (for V8DFmode). */ + unsigned char buffer[64]; + int len; + + while (!partial_defs.is_empty ()) + { + pd_data pd = partial_defs.pop (); + if (TREE_CODE (pd.rhs) == CONSTRUCTOR) + /* Empty CONSTRUCTOR. */ + memset (buffer + MAX (0, pd.offset), + 0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size)); + else + { + len = native_encode_expr (pd.rhs, + buffer + MAX (0, pd.offset), + sizeof (buffer - MAX (0, pd.offset)), + MAX (0, -pd.offset)); + if (len <= 0 + || len < (pd.size - MAX (0, -pd.offset))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Failed to encode %u " + "partial definitions\n", ndefs); + return (void *)-1; + } + } + } + + tree type = vr->type; + /* Make sure to interpret in a type that has a range + covering the whole access size. */ + if (INTEGRAL_TYPE_P (vr->type) + && maxsizei != TYPE_PRECISION (vr->type)) + type = build_nonstandard_integer_type (maxsizei, + TYPE_UNSIGNED (type)); + tree val = native_interpret_expr (type, buffer, + maxsizei / BITS_PER_UNIT); + /* If we chop off bits because the types precision doesn't + match the memory access size this is ok when optimizing + reads but not when called from the DSE code during + elimination. */ + if (val + && type != vr->type) + { + if (! int_fits_type_p (val, vr->type)) + val = NULL_TREE; + else + val = fold_convert (vr->type, val); + } + + if (val) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Successfully combined %u " + "partial definitions\n", ndefs); + return vn_reference_lookup_or_insert_for_pieces + (first_vuse, + vr->set, vr->type, vr->operands, val); + } + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Failed to interpret %u " + "encoded partial definitions\n", ndefs); + return (void *)-1; + } + } + } + } + /* Continue looking for partial defs. */ + return NULL; +} + /* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_ with the current VUSE and performs the expression lookup. */ @@ -1701,6 +1929,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB vn_reference_s **slot; hashval_t hash; + /* If we have partial definitions recorded we have to go through + vn_reference_lookup_3. */ + if (!data->partial_defs.is_empty ()) + return NULL; + if (data->last_vuse_ptr) *data->last_vuse_ptr = vuse; @@ -1964,6 +2197,33 @@ public: static rpo_elim *rpo_avail; basic_block vn_context_bb; +/* Return true if BASE1 and BASE2 can be adjusted so they have the + same address and adjust *OFFSET1 and *OFFSET2 accordingly. + Otherwise return false. */ + +static bool +adjust_offsets_for_equal_base_address (tree base1, poly_int64 *offset1, + tree base2, poly_int64 *offset2) +{ + poly_int64 soff; + if (TREE_CODE (base1) == MEM_REF + && TREE_CODE (base2) == MEM_REF) + { + if (mem_ref_offset (base1).to_shwi (&soff)) + { + base1 = TREE_OPERAND (base1, 0); + *offset1 += soff * BITS_PER_UNIT; + } + if (mem_ref_offset (base2).to_shwi (&soff)) + { + base2 = TREE_OPERAND (base2, 0); + *offset2 += soff * BITS_PER_UNIT; + } + return operand_equal_p (base1, base2, 0); + } + return operand_equal_p (base1, base2, OEP_ADDRESS_OF); +} + /* Callback for walk_non_aliased_vuses. Tries to perform a lookup from the statement defining VUSE and if not successful tries to translate *REFP and VR_ through an aggregate copy at the definition @@ -2175,8 +2435,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree else return (void *)-1; tree len = gimple_call_arg (def_stmt, 2); - if (known_subrange_p (offset, maxsize, offset2, - wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT)) + HOST_WIDE_INT leni, offset2i, offseti; + if (data->partial_defs.is_empty () + && known_subrange_p (offset, maxsize, offset2, + wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT)) { tree val; if (integer_zerop (gimple_call_arg (def_stmt, 1))) @@ -2205,6 +2467,19 @@ vn_reference_lookup_3 (ao_ref *ref, tree return vn_reference_lookup_or_insert_for_pieces (vuse, vr->set, vr->type, vr->operands, val); } + /* For now handle clearing memory with partial defs. */ + else if (integer_zerop (gimple_call_arg (def_stmt, 1)) + && tree_to_poly_int64 (len).is_constant (&leni) + && offset.is_constant (&offseti) + && offset2.is_constant (&offset2i) + && maxsize.is_constant (&maxsizei)) + { + pd_data pd; + pd.rhs = build_constructor (NULL_TREE, NULL); + pd.offset = offset2i - offseti; + pd.size = leni; + return data->push_partial_def (pd, vuse, maxsizei); + } } /* 2) Assignment from an empty CONSTRUCTOR. */ @@ -2215,17 +2490,37 @@ vn_reference_lookup_3 (ao_ref *ref, tree { tree base2; poly_int64 offset2, size2, maxsize2; + HOST_WIDE_INT offset2i, size2i; bool reverse; base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), &offset2, &size2, &maxsize2, &reverse); if (known_size_p (maxsize2) && known_eq (maxsize2, size2) - && operand_equal_p (base, base2, 0) - && known_subrange_p (offset, maxsize, offset2, size2)) + && adjust_offsets_for_equal_base_address (base, &offset, + base2, &offset2)) { - tree val = build_zero_cst (vr->type); - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + if (data->partial_defs.is_empty () + && known_subrange_p (offset, maxsize, offset2, size2)) + { + tree val = build_zero_cst (vr->type); + return vn_reference_lookup_or_insert_for_pieces + (vuse, vr->set, vr->type, vr->operands, val); + } + else if (maxsize.is_constant (&maxsizei) + && maxsizei % BITS_PER_UNIT == 0 + && offset.is_constant (&offseti) + && offseti % BITS_PER_UNIT == 0 + && offset2.is_constant (&offset2i) + && offset2i % BITS_PER_UNIT == 0 + && size2.is_constant (&size2i) + && size2i % BITS_PER_UNIT == 0) + { + pd_data pd; + pd.rhs = gimple_assign_rhs1 (def_stmt); + pd.offset = (offset2i - offseti) / BITS_PER_UNIT; + pd.size = size2i / BITS_PER_UNIT; + return data->push_partial_def (pd, vuse, maxsizei); + } } } @@ -2247,65 +2542,85 @@ vn_reference_lookup_3 (ao_ref *ref, tree && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) { tree base2; - HOST_WIDE_INT offset2, size2; + poly_int64 offset2, size2, maxsize2; + HOST_WIDE_INT offset2i, size2i; bool reverse; - base2 = get_ref_base_and_extent_hwi (gimple_assign_lhs (def_stmt), - &offset2, &size2, &reverse); + base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), + &offset2, &size2, &maxsize2, &reverse); if (base2 && !reverse - && size2 % BITS_PER_UNIT == 0 - && offset2 % BITS_PER_UNIT == 0 - && operand_equal_p (base, base2, 0) - && known_subrange_p (offseti, maxsizei, offset2, size2)) - { - /* We support up to 512-bit values (for V8DFmode). */ - unsigned char buffer[64]; - int len; - - tree rhs = gimple_assign_rhs1 (def_stmt); - if (TREE_CODE (rhs) == SSA_NAME) - rhs = SSA_VAL (rhs); - unsigned pad = 0; - if (BYTES_BIG_ENDIAN - && is_a (TYPE_MODE (TREE_TYPE (rhs)))) - { - /* On big-endian the padding is at the 'front' so - just skip the initial bytes. */ - fixed_size_mode mode - = as_a (TYPE_MODE (TREE_TYPE (rhs))); - pad = GET_MODE_SIZE (mode) - size2 / BITS_PER_UNIT; - } - len = native_encode_expr (rhs, - buffer, sizeof (buffer), - ((offseti - offset2) / BITS_PER_UNIT - + pad)); - if (len > 0 && len * BITS_PER_UNIT >= maxsizei) - { - tree type = vr->type; - /* Make sure to interpret in a type that has a range - covering the whole access size. */ - if (INTEGRAL_TYPE_P (vr->type) - && maxsizei != TYPE_PRECISION (vr->type)) - type = build_nonstandard_integer_type (maxsizei, - TYPE_UNSIGNED (type)); - tree val = native_interpret_expr (type, buffer, - maxsizei / BITS_PER_UNIT); - /* If we chop off bits because the types precision doesn't - match the memory access size this is ok when optimizing - reads but not when called from the DSE code during - elimination. */ - if (val - && type != vr->type) + && known_eq (maxsize2, size2) + && multiple_p (size2, BITS_PER_UNIT) + && multiple_p (offset2, BITS_PER_UNIT) + && adjust_offsets_for_equal_base_address (base, &offset, + base2, &offset2) + && offset.is_constant (&offseti) + && offset2.is_constant (&offset2i) + && size2.is_constant (&size2i)) + { + if (data->partial_defs.is_empty () + && known_subrange_p (offseti, maxsizei, offset2, size2)) + { + /* We support up to 512-bit values (for V8DFmode). */ + unsigned char buffer[64]; + int len; + + tree rhs = gimple_assign_rhs1 (def_stmt); + if (TREE_CODE (rhs) == SSA_NAME) + rhs = SSA_VAL (rhs); + unsigned pad = 0; + if (BYTES_BIG_ENDIAN + && is_a (TYPE_MODE (TREE_TYPE (rhs)))) { - if (! int_fits_type_p (val, vr->type)) - val = NULL_TREE; - else - val = fold_convert (vr->type, val); + /* On big-endian the padding is at the 'front' so + just skip the initial bytes. */ + fixed_size_mode mode + = as_a (TYPE_MODE (TREE_TYPE (rhs))); + pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT; } - - if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + len = native_encode_expr (rhs, + buffer, sizeof (buffer), + ((offseti - offset2i) / BITS_PER_UNIT + + pad)); + if (len > 0 && len * BITS_PER_UNIT >= maxsizei) + { + tree type = vr->type; + /* Make sure to interpret in a type that has a range + covering the whole access size. */ + if (INTEGRAL_TYPE_P (vr->type) + && maxsizei != TYPE_PRECISION (vr->type)) + type = build_nonstandard_integer_type (maxsizei, + TYPE_UNSIGNED (type)); + tree val = native_interpret_expr (type, buffer, + maxsizei / BITS_PER_UNIT); + /* If we chop off bits because the types precision doesn't + match the memory access size this is ok when optimizing + reads but not when called from the DSE code during + elimination. */ + if (val + && type != vr->type) + { + if (! int_fits_type_p (val, vr->type)) + val = NULL_TREE; + else + val = fold_convert (vr->type, val); + } + + if (val) + return vn_reference_lookup_or_insert_for_pieces + (vuse, vr->set, vr->type, vr->operands, val); + } + } + else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i)) + { + pd_data pd; + tree rhs = gimple_assign_rhs1 (def_stmt); + if (TREE_CODE (rhs) == SSA_NAME) + rhs = SSA_VAL (rhs); + pd.rhs = rhs; + pd.offset = (offset2i - offseti) / BITS_PER_UNIT; + pd.size = size2i / BITS_PER_UNIT; + return data->push_partial_def (pd, vuse, maxsizei); } } } @@ -2316,7 +2631,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) - && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) + && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME + /* A subset of partial defs from non-constants can be handled + by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or + even a (series of) BIT_INSERT_EXPR hoping for simplifications + downstream, not so much for actually doing the insertion. */ + && data->partial_defs.is_empty ()) { tree base2; poly_int64 offset2, size2, maxsize2; @@ -2328,7 +2648,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree if (!reverse && known_size_p (maxsize2) && known_eq (maxsize2, size2) - && operand_equal_p (base, base2, 0) + && adjust_offsets_for_equal_base_address (base, &offset, + base2, &offset2) && known_subrange_p (offset, maxsize, offset2, size2) /* ??? We can't handle bitfield precision extracts without either using an alternate type for the BIT_FIELD_REF and @@ -2363,7 +2684,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree && gimple_assign_single_p (def_stmt) && (DECL_P (gimple_assign_rhs1 (def_stmt)) || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF - || handled_component_p (gimple_assign_rhs1 (def_stmt)))) + || handled_component_p (gimple_assign_rhs1 (def_stmt))) + /* Handling this is more complicated, give up for now. */ + && data->partial_defs.is_empty ()) { tree base2; int i, j, k; @@ -2497,7 +2820,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME) && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME) - && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size)) + && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size) + /* Handling this is more complicated, give up for now. */ + && data->partial_defs.is_empty ()) { tree lhs, rhs; ao_ref r; diff -Nurp a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c --- a/gcc/tree-ssa-structalias.c 2020-04-30 15:14:04.644000000 +0800 +++ b/gcc/tree-ssa-structalias.c 2020-04-30 15:14:56.592000000 +0800 @@ -7817,7 +7817,7 @@ associate_varinfo_to_alias (struct cgrap { if ((node->alias || (node->thunk.thunk_p - && ! node->global.inlined_to)) + && ! node->inlined_to)) && node->analyzed && !node->ifunc_resolver) insert_vi_for_tree (node->decl, (varinfo_t)data); @@ -7987,7 +7987,7 @@ ipa_pta_execute (void) /* Nodes without a body are not interesting. Especially do not visit clones at this point for now - we get duplicate decls there for inline clones at least. */ - if (!node->has_gimple_body_p () || node->global.inlined_to) + if (!node->has_gimple_body_p () || node->inlined_to) continue; node->get_body ();