- Add add-checks-to-avoid-spoiling-if-conversion.patch - Add add-option-fallow-store-data-races.patch - Add complete-struct-reorg.patch - Add cse-in-vectorization.patch - Add enable-simd-math.patch - Add fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch - Add fix-ICE-in-compute_live_loop_exits.patch - Add fix-ICE-in-copy_reference_ops_from_ref.patch - Add fix-ICE-in-declare-return-variable.patch - Add fix-ICE-in-exact_div.patch - Add fix-ICE-in-gimple_op.patch - Add fix-ICE-in-model_update_limit_points_in_group.patch - Add fix-ICE-in-reload.patch - Add fix-ICE-in-store_constructor.patch - Add fix-ICE-in-vec.patch - Add fix-ICE-in-vect_create_epilog_for_reduction.patch - Add fix-ICE-in-vect_create_epilog_for_reduction_2.patch - Add fix-ICE-in-vect_create_epilog_for_reduction_3.patch - Add fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch - Add fix-ICE-in-vect_slp_analyze_node_operations.patch - Add fix-ICE-in-vect_stmt_to_vectorize.patch - Add fix-ICE-in-vect_transform_stmt.patch - Add fix-ICE-in-vectorizable_condition.patch - Add fix-ICE-in-verify_ssa.patch - Add fix-ICE-statement-uses-released-SSA-name.patch - Add fix-ICE-when-vectorizing-nested-cycles.patch - Add fix-SSA-update-for-vectorizer-epilogue.patch - Add fix-do-not-build-op.patch - Add fix-load-eliding-in-SM.patch - Add fix-wrong-vectorizer-code.patch - Add generate-csel-for-arrayref.patch - Add ipa-const-prop-self-recursion-bugfix.patch - Add ipa-const-prop.patch - Add ipa-struct-reorg-bugfix.patch - Add ipa-struct-reorg.patch - Add medium-code-mode.patch - Add reduction-chain-slp-option.patch - Add reductions-slp-enhancement.patch - Add simplify-removing-subregs.patch - Add tighten-range-for-generating-csel.patch - Add vectorization-enhancement.patch - Add add-checks-to-avoid-spoiling-if-conversion.patch - Add add-option-fallow-store-data-races.patch - Add complete-struct-reorg.patch - Add cse-in-vectorization.patch - Add enable-simd-math.patch - Add fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch - Add fix-ICE-in-compute_live_loop_exits.patch - Add fix-ICE-in-copy_reference_ops_from_ref.patch - Add fix-ICE-in-declare-return-variable.patch - Add fix-ICE-in-exact_div.patch - Add fix-ICE-in-gimple_op.patch - Add fix-ICE-in-model_update_limit_points_in_group.patch - Add fix-ICE-in-reload.patch - Add fix-ICE-in-store_constructor.patch - Add fix-ICE-in-vec.patch - Add fix-ICE-in-vect_create_epilog_for_reduction.patch - Add fix-ICE-in-vect_create_epilog_for_reduction_2.patch - Add fix-ICE-in-vect_create_epilog_for_reduction_3.patch - Add fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch - Add fix-ICE-in-vect_slp_analyze_node_operations.patch - Add fix-ICE-in-vect_stmt_to_vectorize.patch - Add fix-ICE-in-vect_transform_stmt.patch - Add fix-ICE-in-vectorizable_condition.patch - Add fix-ICE-in-verify_ssa.patch - Add fix-ICE-statement-uses-released-SSA-name.patch - Add fix-ICE-when-vectorizing-nested-cycles.patch - Add fix-SSA-update-for-vectorizer-epilogue.patch - Add fix-do-not-build-op.patch - Add fix-load-eliding-in-SM.patch - Add fix-wrong-vectorizer-code.patch - Add generate-csel-for-arrayref.patch - Add ipa-const-prop-self-recursion-bugfix.patch - Add ipa-const-prop.patch - Add ipa-struct-reorg-bugfix.patch - Add ipa-struct-reorg.patch - Add medium-code-mode.patch - Add reduction-chain-slp-option.patch - Add reductions-slp-enhancement.patch - Add simplify-removing-subregs.patch - Add tighten-range-for-generating-csel.patch - Add vectorization-enhancement.patch
20240 lines
746 KiB
Diff
20240 lines
746 KiB
Diff
This backport contains 128 patchs from gcc main stream tree.
|
|
The commit id of these patchs list as following in the order of time.
|
|
|
|
0001-Aarch64-SVE-Dot-product-support.patch
|
|
9feeafd7f95ea9f7211908c137c60074b3a52da2
|
|
|
|
0002-tree-vect-stmts.c-get_group_load_store_type-Avoid-pe.patch
|
|
419c5f99876d9ee517f6b646dd785cdcaf5cb6fe
|
|
|
|
0003-re-PR-tree-optimization-90358-526.blender_r-train-ru.patch
|
|
898758504fa87d9f5e72c2c8b32139b413276a10
|
|
|
|
0004-tree-vect-slp.c-vect_build_slp_tree_2-Bump-size-when.patch
|
|
9f708a844853eb2fe87e696d27de14cbd68896f8
|
|
|
|
0005-cfgloop.h-struct-loop-Add-simdlen-member.patch
|
|
f63445e56c265757ebd50dc12fcd01773341b49f
|
|
|
|
0006-Current-vectoriser-doesn-t-support-masked-loads-for-.patch
|
|
997636716c5dde7d59d026726a6f58918069f122
|
|
|
|
0007-tree-vrp.h-value_range_base-nonzero_p-New.patch
|
|
f2b00d2ba461d6dafdeccf6d93828b349b5e7f76
|
|
|
|
0008-AArch64-PR-tree-optimization-90332-Implement-vec_ini.patch
|
|
41dab855dce20d5d7042c9330dd8124d0ece19c0
|
|
|
|
0009-Fix-a-thinko-in-tree-ssa-loop.c.patch
|
|
cc261f66c268107b120add99942d729b3a489452
|
|
|
|
0010-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
|
|
3fe0ddc88334f9afd622458653a6d103948994bd
|
|
|
|
0011-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
|
|
08c1638dab9becfafc65064891c1c59f5711c27f
|
|
|
|
0012-Remove-quite-obvious-dead-assignments.patch
|
|
45309d286c80ecad8b7a4efba0e9aba35d847af6
|
|
|
|
0013-Fix-various-issues-seen-with-clang-static-analyzer.patch
|
|
ef874db611879d5004e1d834543e55d31f2bfe1c
|
|
|
|
0014-re-PR-tree-optimization-91033-ICE-in-vect_analyze_lo.patch
|
|
a7b3509eb6aa51d696be5edba6f4e451ceff03a0
|
|
|
|
0015-re-PR-tree-optimization-91069-Miscompare-of-453.povr.patch
|
|
75da268e1a563a1a52389cd2ecee12d07c45a655
|
|
|
|
0016-tree-vrp.c-extract_range_from_multiplicative_op-Add-.patch
|
|
e2cfa983c31fa7886f496a47feb8714297ca0063
|
|
|
|
0017-re-PR-tree-optimization-91257-Compile-time-and-memor.patch
|
|
a55d6091230ae8d0d6f6c20dcc55158f6705090e
|
|
|
|
0018-re-PR-tree-optimization-91257-Compile-time-and-memor.patch
|
|
ce52e0ffb4f1ea7bd4fb99aea5dda75d260e438f
|
|
|
|
0019-Enforce-canonicalization-in-value_range.patch:
|
|
c7cf3a9bb00b6d64ba0c0e0761f000758e9428a6
|
|
|
|
0020-tree-vectorizer.h-get_initial_def_for_reduction-Remo.patch
|
|
5fdd6038147e4ba30c8c01332dae8ab0d717bc14
|
|
|
|
0021-tree-parloops.c-report_ploop_op-Copy-from-report_vec.patch
|
|
31de92e39bbeffb9f1641d292e94b48f70809ae1
|
|
|
|
0022-tree-vect-loop.c-vect_is_simple_reduction-Remove-ope.patch
|
|
901083b9bdf69a7b1382f9682c6fd1d5759667dd
|
|
|
|
0023-Enforce-correct-COND_EXPR-order-for-EXTRACT_LAST_RED.patch
|
|
c449d3ae28ff4e133114fb67dbf7dcc7a95ca5d5
|
|
|
|
0024-tree-vect-loop.c-vect_is_slp_reduction-Remove.patch
|
|
b3c4d0dd309b7027f6e0f0b9a84829fcd53f7d64
|
|
|
|
0025-re-PR-tree-optimization-91822-FAIL-gcc.dg-pr88031.c-.patch
|
|
6e222b2a3aede20f3093802d1649e75848e3bd2b
|
|
|
|
0026-re-PR-target-91269-unaligned-floating-point-register.patch
|
|
d63eadac7db10d4846bdffa93fd164cb035fb102
|
|
|
|
0027-tree-vect-loop.c-get_initial_def_for_reduction-Simpl.patch
|
|
d469a71e5a0eb512b522248841c56496abca8cd6
|
|
|
|
0028-tree-vectorizer.h-_stmt_vec_info-const_cond_reduc_co.patch
|
|
a7701dd16103048432ec8051e4773760c0e2cf90
|
|
|
|
0029-re-PR-tree-optimization-91896-ICE-in-vect_get_vec_de.patch
|
|
fadb01364d36a50836201bc9a6a03e525d267967
|
|
|
|
0030-tree-vect-loop.c-vect_analyze_loop_operations-Also-c.patch
|
|
9593e8e5e391e77bb065d4689b7511bed6a640a3
|
|
|
|
0031-tree-vect-loop.c-vect_analyze_loop_operations-Analyz.patch
|
|
1b4dbccc1f828fa00e6acc8b88d24301c65552df
|
|
|
|
0032-Fix-reduc_index-1-handling-for-COND_REDUCTION-PR9190.patch
|
|
18908a56e18f15f84a91a4529923dd0878b2294f
|
|
|
|
0033-tree-vectorizer.h-_stmt_vec_info-reduc_fn-New.patch
|
|
29f26978866f32bddd656847441a3a953ffd7a21
|
|
|
|
0034-gimple.c-gimple_get_lhs-For-PHIs-return-the-result.patch
|
|
61362d9d18916bd5b694385982cf4a02b7537b0e
|
|
|
|
0035-tree-vect-loop.c-vectorizable_reduction-Move-variabl.patch
|
|
c7ea76ea5629e9f0357de49847274cf80e35f2f8
|
|
|
|
0036-tree-if-conv.c-tree_if_conversion-Move-call-to-ifcvt.patch
|
|
f30b3d2891cef9803badb3f85d739c0fcfafd585
|
|
|
|
0037-tree-vectorizer.h-stmt_vec_info_type-cycle_phi_info_.patch
|
|
291fa23ac04e317877c1e102937532f080180bb2
|
|
|
|
0038-re-PR-tree-optimization-91940-__builtin_bswap16-loop.patch
|
|
9ff9a0a5e6edd8729f559bf86ca06f781c4da246
|
|
|
|
0039-tree-vectorizer.h-vect_transform_reduction-Declare.patch
|
|
9f4d9a366b3299c276043ab987234c7bed7d29f2
|
|
|
|
0040-re-PR-target-91982-gcc.target-aarch64-sve-clastb_-.c.patch
|
|
48528394eafa9d1db9f956570f910c76d429a3e5
|
|
|
|
0041-re-PR-tree-optimization-91532-SVE-Redundant-predicat.patch
|
|
b238b34ea47222ffca7addc5fe4e8c052ade88b3
|
|
|
|
0042-tree-vectorizer.h-_stmt_vec_info-v_reduc_type-Remove.patch
|
|
69f8c1aef5cdcc54d5cb2ca4f99f4f26c2f822a9
|
|
|
|
0043-tree-vectorizer.h-_stmt_vec_info-reduc_vectype_in-Ne.patch
|
|
f78347996e02a8a767a525bfb764e769afe29d67
|
|
|
|
0044-tree-vect-loop.c-vect_is_simple_reduction-Simplify-a.patch
|
|
4a8841c0413d52261a8d024577381582d07a866a
|
|
|
|
0045-re-PR-tree-optimization-92069-ice-in-vect_analyze_sc.patch
|
|
7bd8bec53f0e43c7a7852c54650746e65324514b
|
|
|
|
0046-Deal-with-incoming-POLY_INT_CST-ranges-PR92033.patch
|
|
96eb7d7a642085f651e9940f0ee75568d7c4441d
|
|
|
|
0047-tree-vect-loop.c-vect_valid_reduction_input_p-Remove.patch
|
|
aab8c2fd6542a52663243eec160b80bdd61516d5
|
|
|
|
0048-tree-vect-loop.c-needs_fold_left_reduction_p-Export.patch
|
|
aa9dffac731d0359a0e7a925ff8f4a1bef182eac
|
|
|
|
0049-vect-Refactor-versioning-threshold.patch
|
|
a421fe9e610b5dbfce1913cd724c8ba193addd47
|
|
|
|
0050-vect-Outline-code-into-new-function-determine_peel_f.patch
|
|
31b35fd503e1c6713839db24044812d237aba5f1
|
|
|
|
0051-vect-Be-consistent-in-versioning-threshold-use.patch
|
|
f261d4808cc28a2dfd47fe06c97364c0869bb78f
|
|
|
|
0052-tree-vect-loop.c-check_reduction_path-Compute-reduct.patch
|
|
58baf7ab85cbb1068a651c96f7d56e2902ead6cc
|
|
|
|
0053-tree-vectorizer.h-_stmt_vec_info-cond_reduc_code-Rem.patch
|
|
c11cccc0285f02f117a1e80924fb7673b6486ce9
|
|
|
|
0054-re-PR-target-86753-gcc.target-aarch64-sve-vcond_-45-.patch
|
|
cc1facefe3b4e3b067d95291a7dba834b830ff18
|
|
|
|
0055-Avoid-recomputing-data-references-in-BB-SLP.patch
|
|
fa0c8df71d4f0476834db0b7cd88524878b46cf7
|
|
|
|
0056-Move-code-out-of-vect_slp_analyze_bb_1.patch
|
|
1d778697b37aec23db5b6003dfe08d2d78bd9424
|
|
|
|
0057-Avoid-setting-current_vector_size-in-get_vec_alignme.patch
|
|
da157e2ee9e12348df78246ee33b244b7cc334df
|
|
|
|
0058-Pass-a-vec_info-to-vect_supportable_shift.patch
|
|
a5c3185a503fbdbc1bf05efe8ab9d12850a211c1
|
|
|
|
0059-Pass-a-vec_info-to-vect_supportable_direct_optab_p.patch
|
|
dcab2a0d1d4b2c0b4bba6f5e3834ec0678a2a5c8
|
|
|
|
0060-Pass-a-vec_info-to-get_mask_type_for_scalar_type.patch
|
|
1bd5196c9b1a0cd7280adadd6d788f81a82ca023
|
|
|
|
0061-Pass-a-vec_info-to-get_vectype_for_scalar_type.patch
|
|
7ed54790da87bbb4a134020a9fb8bd1b72fd0acb
|
|
|
|
0062-Pass-a-vec_info-to-duplicate_and_interleave.patch
|
|
cdbe6e9bb4ae2882f77f94993783085fa342a9f9
|
|
|
|
0063-Pass-a-vec_info-to-can_duplicate_and_interleave_p.patch
|
|
43fdde5738ea0554fa000987e9769add027f4876
|
|
|
|
0064-Pass-a-vec_info-to-simple_integer_narrowing.patch
|
|
6c261c667801eee46a6221d3681d17493c0bbd65
|
|
|
|
0065-Pass-a-vec_info-to-supportable_narrowing_operation.patch
|
|
db8374a63fd0ea84f72ac76cc899be44df36df6a
|
|
|
|
0066-Pass-a-loop_vec_info-to-vect_maybe_permute_loop_mask.patch
|
|
b0dab10e71b03441beefbbf951c0812056413cd3
|
|
|
|
0067-Pass-a-vec_info-to-vect_halve_mask_nunits.patch
|
|
830e90dab3dee5c8129c7760ff09ab112c2cd271
|
|
|
|
0068-Pass-a-vec_info-to-vect_double_mask_nunits.patch
|
|
8d1473958808fe4714ec24991ac83ee6cbf45397
|
|
|
|
0069-Replace-current_vector_size-with-vec_info-vector_siz.patch
|
|
ba7f76dd6bbf038948bbe516764a8bb0c851f750
|
|
|
|
0070-tree-vectorizer.h-_slp_tree-ops-New-member.patch
|
|
30c0d1e3cf8b03992e08cfd00ccf1fcb638d3c03
|
|
|
|
0071-re-PR-tree-optimization-92162-ICE-in-vect_create_epi.patch
|
|
53b15ca96116544a7a3ca8bc5f4e1649b74f3d45
|
|
|
|
0072-Fix-use-after-free-in-vector_size-change.patch
|
|
87121696fb2ddbec5f33daa359234850f7fd306d
|
|
|
|
0073-re-PR-tree-optimization-92173-ICE-in-optab_for_tree_.patch
|
|
9107d6526b938eba8168025c0d90d06ad3634e69
|
|
|
|
0074-re-PR-tree-optimization-92173-ICE-in-optab_for_tree_.patch
|
|
6c7b0df8029d01e05577668333660d0bc58a3023
|
|
|
|
0075-AArch64-Don-t-apply-mode_for_int_vector-to-scalars.patch
|
|
d7814449f229cecdee48afe381519a61ea7e3378
|
|
|
|
0076-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
|
|
82e8e335f917b9ce40801838c06f7945cf88da43
|
|
|
|
0077-re-PR-tree-optimization-92205-ICE-in-vect_get_vec_de.patch
|
|
e227594789d909fbad56f6036910938678738f92
|
|
|
|
0078-tree-vect-slp.c-vect_get_and_check_slp_defs-For-redu.patch
|
|
4352288a3df915575a2b820f702242908740106f
|
|
|
|
0079-tree-vect-loop.c-vectorizable_reduction-Verify-STMT_.patch
|
|
ea133b14f48ed5730748a7e02e322fb07ccc2d85
|
|
|
|
0080-Fix-reductions-for-fully-masked-loops.patch
|
|
89d0345ad7b8d84045813972ee60557a6b511c57
|
|
|
|
0081-tree-vect-loop.c-vect_create_epilog_for_reduction-Us.patch
|
|
e0c4f7fbd6a4ee8e3a1468514044bd941fa28522
|
|
|
|
0082-re-PR-tree-optimization-92241-ice-in-vect_mark_patte.patch
|
|
97c6bea819ec0a773041308e62a7c05c33f093b0
|
|
|
|
0083-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
|
|
b7ff7cef5005721e78d6936bed3ae1c059b4e8d2
|
|
|
|
0084-Fix-reduc_index-calculation-in-vectorizable_conditio.patch
|
|
1d149b7260bcc4c0c6367b3aea47a8b91a1cf345
|
|
|
|
0085-vect-PR-88915-Vectorize-epilogues-when-versioning-lo.patch
|
|
97c146036750e7cb3966d292572ec158a78f356e
|
|
|
|
0086-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
|
|
b4673569c2a8b974e3f84ffaa547941c5d40cfe5
|
|
|
|
0087-Come-up-with-an-abstraction.patch
|
|
7f4a8ee03d404c560dcb75ba684fd57ffbc77e85
|
|
|
|
0088-re-PR-tree-optimization-92275-ICE-error-definition-i.patch
|
|
b81f2dafdbd2c5aa49213b35dc12d4610834e39e
|
|
|
|
0089-vect-Make-vect-epilogues-nomask-1-default.patch
|
|
1297712fb4af6c6bfd827e0f0a9695b14669f87d
|
|
|
|
0090-vect-Clean-up-orig_loop_vinfo-from-vect_analyze_loop.patch
|
|
494d6c28c53d0852bb6468b1f1ca189159775fcc
|
|
|
|
0091-re-PR-tree-optimization-92371-ICE-in-info_for_reduct.patch
|
|
02bf7e6fa219f939b3225c54fbe8bab2133b1aeb
|
|
|
|
0092-vect-PR92317-fix-skip_epilogue-creation-for-epilogue.patch
|
|
2e7a4f579b1157754ea20a03431b4fa80cd4567a
|
|
|
|
0093-Restructure-vect_analyze_loop.patch
|
|
72d6aeecd95ec49fff1d258e4631167a03351cbb
|
|
|
|
0094-Check-the-VF-is-small-enough-for-an-epilogue-loop.patch
|
|
8ec5b16a9a3dbd6d825596c22f1bc32646de28fe
|
|
|
|
0095-tree-vect-loop.c-vectorizable_reduction-Remember-red.patch
|
|
06af1f1a0def9de076ec629ea634122f15882ce6
|
|
|
|
0096-Don-t-vectorise-single-iteration-epilogues.patch
|
|
4b205bf82d06c4d9d0ae7b78e54c712d79d5b021
|
|
|
|
0097-re-PR-tree-optimization-92405-ICE-in-vect_get_vec_de.patch
|
|
084d390246c2172853f9e12ce04aef23cba79590
|
|
|
|
0098-re-PR-tree-optimization-92324-ICE-in-expand_direct_o.patch
|
|
f1e1ed3314b7c6308f64cbbcf6d1916e239c8e35
|
|
|
|
0099-vect-Disable-vectorization-of-epilogues-for-loops-wi.patch
|
|
b602712b3ea2a0729a2eda61bd9ee795aba6138f
|
|
|
|
0100-Use-correct-vector-type-in-neutral_op_for_slp_reduct.patch
|
|
d308ca27c71e43625b378dc6c2774105867d4fa7
|
|
|
|
0101-vect-Account-for-epilogue-s-peeling-for-gaps-when-ch.patch
|
|
87b47251924c7539a9a8e191587d118a14496473
|
|
|
|
0102-Add-a-targetm.vectorize.related_mode-hook.patch
|
|
f09552335030433018fd5f7f6b9848339b5ca2da
|
|
|
|
0103-Replace-mode_for_int_vector-with-related_int_vector_.patch
|
|
d083ee47a9828236016841356fc7207e7c90bbbd
|
|
|
|
0104-Add-build_truth_vector_type_for_mode.patch
|
|
0a0ef2387cc1561d537d8d949aef9479ef17ba35
|
|
|
|
0105-Remove-build_-same_sized_-truth_vector_type.patch
|
|
e8738f4e9686203451fd11f05b268b8a31b95ebd
|
|
|
|
0106-Pass-the-data-vector-mode-to-get_mask_mode.patch
|
|
10116ec1c147a76522cafba6b6a5b4ed1cb37b77
|
|
|
|
0107-Use-build_vector_type_for_mode-in-get_vectype_for_sc.patch
|
|
95da266b86fcdeff84fcadc5e3cde3d0027e571d
|
|
|
|
0108-Use-consistent-compatibility-checks-in-vectorizable_.patch
|
|
0203c4f3bfb3e3242635b0cee0b9deedb4070a62
|
|
|
|
0109-Use-consistent-compatibility-checks-in-vectorizable_.patch
|
|
e021fb865564b62a10adb1e98f75b5ea05058047
|
|
|
|
0110-Replace-vec_info-vector_size-with-vec_info-vector_mo.patch
|
|
1c84a2d25ecd4c03dde745f36a4762dd45f97c85
|
|
|
|
0111-Make-less-use-of-get_same_sized_vectype.patch
|
|
2df4150075c03f8a292c40afd3bb25febb673578
|
|
|
|
0112-Require-equal-type-sizes-for-vectorised-calls.patch
|
|
7f52eb891b738337d5cf82c7c440a5eea8c7b0c9
|
|
|
|
0113-Support-vectorisation-with-mixed-vector-sizes.patch
|
|
df7c22831f1e48dba49479c5960c1c180d8eab2c
|
|
|
|
0114-Avoid-retrying-with-the-same-vector-modes.patch
|
|
a55d8232df3dd4f7a3f5b70025074c3919b802a6
|
|
|
|
0115-AArch64-Support-vectorising-with-multiple-vector-siz.patch
|
|
74166aabeb7f22990476b1169bba031b8323ee92
|
|
|
|
0116-Allow-mixed-vector-sizes-within-a-single-vectorised-.patch
|
|
05101d1b575a57ca26e4275e971da85a0dd1d52a
|
|
|
|
0117-Vectorise-conversions-between-differently-sized-inte.patch
|
|
9c437a108a14b9bdc44659c131b0da944e5ffeab
|
|
|
|
0118-Consider-building-nodes-from-scalars-in-vect_slp_ana.patch
|
|
60838d634634a70d65a126166c944b159ac7649c
|
|
|
|
0119-Optionally-pick-the-cheapest-loop_vec_info.patch
|
|
bcc7e346bf9b5dc77797ea949d6adc740deb30ca
|
|
|
|
0120-Move-canonicalisation-of-dr_with_seg_len_pair_ts.patch
|
|
1fb2b0f69ee849142b669ba1b82264ce6d0f75f9
|
|
|
|
0121-Delay-swapping-data-refs-in-prune_runtime_alias_test.patch
|
|
97602450b04e94aff034381bf6ee4236b95727ed
|
|
|
|
0122-Add-flags-to-dr_with_seg_len_pair_t.patch
|
|
e9acf80c96d681917d930869b7cbfb7d2fa54d51
|
|
|
|
0123-Record-whether-a-dr_with_seg_len-contains-mixed-step.patch
|
|
52c29905259363ce2b78dd7aa8a25cf531cddb3a
|
|
|
|
0124-Dump-the-list-of-merged-alias-pairs.patch
|
|
cad984b289e2b3aca786314c673339eb0500fefa
|
|
|
|
0125-Print-the-type-of-alias-check-in-a-dump-message.patch
|
|
b4d1b635737a4780e5be247f8be9550eaf83dae5
|
|
|
|
0126-Use-a-single-comparison-for-index-based-alias-checks.patch
|
|
f9d6338bd15ce1fae36bf25d3a0545e9678ddc58
|
|
|
|
0127-Optimise-WAR-and-WAW-alias-checks.patch
|
|
8489e1f45b50600c01eb8ed8c5d0ca914ded281c
|
|
|
|
0128-Avoid-quadratic-behaviour-in-prune_runtime_alias_tes.patch
|
|
ea1ff9e46c7ec5e49ec671616cfcf405ef665054
|
|
|
|
diff --git a/gcc/asan.c b/gcc/asan.c
|
|
index 3b800b26b69..605d04f87f7 100644
|
|
--- a/gcc/asan.c
|
|
+++ b/gcc/asan.c
|
|
@@ -1713,8 +1713,8 @@ asan_emit_allocas_unpoison (rtx top, rtx bot, rtx_insn *before)
|
|
rtx ret = init_one_libfunc ("__asan_allocas_unpoison");
|
|
top = convert_memory_address (ptr_mode, top);
|
|
bot = convert_memory_address (ptr_mode, bot);
|
|
- ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode,
|
|
- top, ptr_mode, bot, ptr_mode);
|
|
+ emit_library_call (ret, LCT_NORMAL, ptr_mode,
|
|
+ top, ptr_mode, bot, ptr_mode);
|
|
|
|
do_pending_stack_adjust ();
|
|
rtx_insn *insns = get_insns ();
|
|
diff --git a/gcc/bt-load.c b/gcc/bt-load.c
|
|
index a7d9d53954e..f68879ca49a 100644
|
|
--- a/gcc/bt-load.c
|
|
+++ b/gcc/bt-load.c
|
|
@@ -1169,7 +1169,6 @@ move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range,
|
|
|
|
if (def->other_btr_uses_before_def)
|
|
{
|
|
- insp = BB_END (b);
|
|
for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp))
|
|
gcc_assert (insp != BB_HEAD (b));
|
|
|
|
diff --git a/gcc/builtins.c b/gcc/builtins.c
|
|
index ed11f79ff0b..910e614a4d1 100644
|
|
--- a/gcc/builtins.c
|
|
+++ b/gcc/builtins.c
|
|
@@ -1653,11 +1653,8 @@ expand_builtin_apply_args_1 (void)
|
|
/* Save the structure value address unless this is passed as an
|
|
"invisible" first argument. */
|
|
if (struct_incoming_value)
|
|
- {
|
|
- emit_move_insn (adjust_address (registers, Pmode, size),
|
|
- copy_to_reg (struct_incoming_value));
|
|
- size += GET_MODE_SIZE (Pmode);
|
|
- }
|
|
+ emit_move_insn (adjust_address (registers, Pmode, size),
|
|
+ copy_to_reg (struct_incoming_value));
|
|
|
|
/* Return the address of the block. */
|
|
return copy_addr_to_reg (XEXP (registers, 0));
|
|
@@ -1806,7 +1803,6 @@ expand_builtin_apply (rtx function, rtx arguments, rtx argsize)
|
|
emit_move_insn (struct_value, value);
|
|
if (REG_P (struct_value))
|
|
use_reg (&call_fusage, struct_value);
|
|
- size += GET_MODE_SIZE (Pmode);
|
|
}
|
|
|
|
/* All arguments and registers used for the call are set up by now! */
|
|
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
|
|
index c0582a54c93..cb999cbf82f 100644
|
|
--- a/gcc/c/c-typeck.c
|
|
+++ b/gcc/c/c-typeck.c
|
|
@@ -5424,7 +5424,7 @@ build_conditional_expr (location_t colon_loc, tree ifexp, bool ifexp_bcp,
|
|
tree elem_type = TREE_TYPE (vectype);
|
|
tree zero = build_int_cst (elem_type, 0);
|
|
tree zero_vec = build_vector_from_val (vectype, zero);
|
|
- tree cmp_type = build_same_sized_truth_vector_type (vectype);
|
|
+ tree cmp_type = truth_type_for (vectype);
|
|
ifexp = build2 (NE_EXPR, cmp_type, ifexp, zero_vec);
|
|
}
|
|
|
|
@@ -11327,7 +11327,7 @@ build_vec_cmp (tree_code code, tree type,
|
|
{
|
|
tree zero_vec = build_zero_cst (type);
|
|
tree minus_one_vec = build_minus_one_cst (type);
|
|
- tree cmp_type = build_same_sized_truth_vector_type (type);
|
|
+ tree cmp_type = truth_type_for (type);
|
|
tree cmp = build2 (code, cmp_type, arg0, arg1);
|
|
return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
|
|
}
|
|
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
|
|
index e252975f546..4ae8e3b3297 100644
|
|
--- a/gcc/cfgexpand.c
|
|
+++ b/gcc/cfgexpand.c
|
|
@@ -3029,7 +3029,6 @@ expand_asm_stmt (gasm *stmt)
|
|
}
|
|
}
|
|
}
|
|
- unsigned nclobbers = clobber_rvec.length();
|
|
|
|
/* First pass over inputs and outputs checks validity and sets
|
|
mark_addressable if needed. */
|
|
@@ -3301,7 +3300,7 @@ expand_asm_stmt (gasm *stmt)
|
|
gcc_assert (constraints.length() == noutputs + ninputs);
|
|
|
|
/* But it certainly can adjust the clobbers. */
|
|
- nclobbers = clobber_rvec.length();
|
|
+ unsigned nclobbers = clobber_rvec.length ();
|
|
|
|
/* Third pass checks for easy conflicts. */
|
|
/* ??? Why are we doing this on trees instead of rtx. */
|
|
@@ -5979,11 +5978,11 @@ construct_init_block (void)
|
|
{
|
|
first_block = e->dest;
|
|
redirect_edge_succ (e, init_block);
|
|
- e = make_single_succ_edge (init_block, first_block, flags);
|
|
+ make_single_succ_edge (init_block, first_block, flags);
|
|
}
|
|
else
|
|
- e = make_single_succ_edge (init_block, EXIT_BLOCK_PTR_FOR_FN (cfun),
|
|
- EDGE_FALLTHRU);
|
|
+ make_single_succ_edge (init_block, EXIT_BLOCK_PTR_FOR_FN (cfun),
|
|
+ EDGE_FALLTHRU);
|
|
|
|
update_bb_for_insn (init_block);
|
|
return init_block;
|
|
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
|
|
index a1d603a207e..a18b6490bdd 100644
|
|
--- a/gcc/cfghooks.c
|
|
+++ b/gcc/cfghooks.c
|
|
@@ -253,8 +253,6 @@ verify_flow_info (void)
|
|
err = 1;
|
|
}
|
|
|
|
- last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
-
|
|
/* Clean up. */
|
|
free (last_visited);
|
|
free (edge_checksum);
|
|
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
|
|
index b78d87d22f1..98bf6d2adda 100644
|
|
--- a/gcc/cfgloop.h
|
|
+++ b/gcc/cfgloop.h
|
|
@@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
|
|
of the loop can be safely evaluated concurrently. */
|
|
int safelen;
|
|
|
|
+ /* Preferred vectorization factor for the loop if non-zero. */
|
|
+ int simdlen;
|
|
+
|
|
/* Constraints are generally set by consumers and affect certain
|
|
semantics of niter analyzer APIs. Currently the APIs affected are
|
|
number_of_iterations_exit* functions and their callers. One typical
|
|
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
|
|
index ea4b914c15b..8fc697ecf5d 100644
|
|
--- a/gcc/cfgloopmanip.c
|
|
+++ b/gcc/cfgloopmanip.c
|
|
@@ -364,7 +364,6 @@ remove_path (edge e, bool *irred_invalidated,
|
|
|
|
for (i = 0; i < nrem; i++)
|
|
{
|
|
- bb = rem_bbs[i];
|
|
FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
|
|
if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
|
|
&& !bitmap_bit_p (seen, ae->dest->index))
|
|
@@ -1016,6 +1015,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
|
|
target->nb_iterations_estimate = loop->nb_iterations_estimate;
|
|
target->estimate_state = loop->estimate_state;
|
|
target->safelen = loop->safelen;
|
|
+ target->simdlen = loop->simdlen;
|
|
target->constraints = loop->constraints;
|
|
target->can_be_parallel = loop->can_be_parallel;
|
|
target->warned_aggressive_loop_optimizations
|
|
diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
|
|
index 08e534f2485..b5f15907bde 100644
|
|
--- a/gcc/cfgrtl.c
|
|
+++ b/gcc/cfgrtl.c
|
|
@@ -2958,7 +2958,6 @@ rtl_verify_bb_layout (void)
|
|
basic_block last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun), curr_bb = NULL;
|
|
|
|
num_bb_notes = 0;
|
|
- last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun);
|
|
|
|
for (x = rtx_first; x; x = NEXT_INSN (x))
|
|
{
|
|
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
|
|
index a16f4668b3c..bed6838d22b 100644
|
|
--- a/gcc/cgraph.c
|
|
+++ b/gcc/cgraph.c
|
|
@@ -2717,8 +2717,6 @@ bool
|
|
cgraph_node::set_pure_flag (bool pure, bool looping)
|
|
{
|
|
struct set_pure_flag_info info = {pure, looping, false};
|
|
- if (!pure)
|
|
- looping = false;
|
|
call_for_symbol_thunks_and_aliases (set_pure_flag_1, &info, !pure, true);
|
|
return info.changed;
|
|
}
|
|
diff --git a/gcc/combine.c b/gcc/combine.c
|
|
index 567aa2c3715..b9d674c96cc 100644
|
|
--- a/gcc/combine.c
|
|
+++ b/gcc/combine.c
|
|
@@ -6591,7 +6591,6 @@ simplify_if_then_else (rtx x)
|
|
|| reg_mentioned_p (true_rtx, false_rtx)
|
|
|| rtx_equal_p (false_rtx, XEXP (cond, 0))))
|
|
{
|
|
- true_code = reversed_comparison_code (cond, NULL);
|
|
SUBST (XEXP (x, 0), reversed_comparison (cond, GET_MODE (cond)));
|
|
SUBST (XEXP (x, 1), false_rtx);
|
|
SUBST (XEXP (x, 2), true_rtx);
|
|
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
|
index e3852c5d182..28f93a70801 100644
|
|
--- a/gcc/config/aarch64/aarch64-simd.md
|
|
+++ b/gcc/config/aarch64/aarch64-simd.md
|
|
@@ -3183,7 +3183,7 @@
|
|
;; In this insn, operand 1 should be low, and operand 2 the high part of the
|
|
;; dest vector.
|
|
|
|
-(define_insn "*aarch64_combinez<mode>"
|
|
+(define_insn "@aarch64_combinez<mode>"
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
|
|
(vec_concat:<VDBL>
|
|
(match_operand:VDC 1 "general_operand" "w,?r,m")
|
|
@@ -3197,7 +3197,7 @@
|
|
(set_attr "arch" "simd,fp,simd")]
|
|
)
|
|
|
|
-(define_insn "*aarch64_combinez_be<mode>"
|
|
+(define_insn "@aarch64_combinez_be<mode>"
|
|
[(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
|
|
(vec_concat:<VDBL>
|
|
(match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
|
|
@@ -5926,6 +5926,15 @@
|
|
DONE;
|
|
})
|
|
|
|
+(define_expand "vec_init<mode><Vhalf>"
|
|
+ [(match_operand:VQ_NO2E 0 "register_operand" "")
|
|
+ (match_operand 1 "" "")]
|
|
+ "TARGET_SIMD"
|
|
+{
|
|
+ aarch64_expand_vector_init (operands[0], operands[1]);
|
|
+ DONE;
|
|
+})
|
|
+
|
|
(define_insn "*aarch64_simd_ld1r<mode>"
|
|
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
|
|
(vec_duplicate:VALL_F16
|
|
@@ -6937,3 +6946,21 @@
|
|
"pmull2\\t%0.1q, %1.2d, %2.2d"
|
|
[(set_attr "type" "crypto_pmull")]
|
|
)
|
|
+
|
|
+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
|
|
+(define_insn "<optab><Vnarrowq><mode>2"
|
|
+ [(set (match_operand:VQN 0 "register_operand" "=w")
|
|
+ (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
|
|
+ "TARGET_SIMD"
|
|
+ "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
|
|
+ [(set_attr "type" "neon_shift_imm_long")]
|
|
+)
|
|
+
|
|
+;; Truncate a 128-bit integer vector to a 64-bit vector.
|
|
+(define_insn "trunc<mode><Vnarrowq>2"
|
|
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
|
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
|
|
+ "TARGET_SIMD"
|
|
+ "xtn\t%0.<Vntype>, %1.<Vtype>"
|
|
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
|
|
+)
|
|
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
|
|
index 3f39c4c5b63..02d33b7276f 100644
|
|
--- a/gcc/config/aarch64/aarch64-sve.md
|
|
+++ b/gcc/config/aarch64/aarch64-sve.md
|
|
@@ -3132,3 +3132,19 @@
|
|
DONE;
|
|
}
|
|
)
|
|
+
|
|
+;; Unpredicated DOT product.
|
|
+(define_insn "<sur>dot_prod<vsi2qi>"
|
|
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
|
|
+ (plus:SVE_SDI
|
|
+ (unspec:SVE_SDI
|
|
+ [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
|
|
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
|
|
+ DOTPROD)
|
|
+ (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
|
|
+ "TARGET_SVE"
|
|
+ "@
|
|
+ <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
|
|
+ movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
|
|
+ [(set_attr "movprfx" "*,yes")]
|
|
+)
|
|
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
index 2ff0bc0a686..128c250dffe 100644
|
|
--- a/gcc/config/aarch64/aarch64.c
|
|
+++ b/gcc/config/aarch64/aarch64.c
|
|
@@ -1549,17 +1549,37 @@ aarch64_sve_pred_mode (unsigned int elem_nbytes)
|
|
/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
|
|
|
|
static opt_machine_mode
|
|
-aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
|
|
+aarch64_get_mask_mode (machine_mode mode)
|
|
{
|
|
- if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
|
|
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
|
|
+ if (vec_flags & VEC_SVE_DATA)
|
|
+ return aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode));
|
|
+
|
|
+ return default_get_mask_mode (mode);
|
|
+}
|
|
+
|
|
+/* Implement TARGET_VECTORIZE_RELATED_MODE. */
|
|
+
|
|
+static opt_machine_mode
|
|
+aarch64_vectorize_related_mode (machine_mode vector_mode,
|
|
+ scalar_mode element_mode,
|
|
+ poly_uint64 nunits)
|
|
+{
|
|
+ unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
|
|
+
|
|
+ /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
|
|
+ if ((vec_flags & VEC_ADVSIMD)
|
|
+ && known_eq (nunits, 0U)
|
|
+ && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
|
|
+ && maybe_ge (GET_MODE_BITSIZE (element_mode)
|
|
+ * GET_MODE_NUNITS (vector_mode), 128U))
|
|
{
|
|
- unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
|
|
- machine_mode pred_mode;
|
|
- if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
|
|
- return pred_mode;
|
|
+ machine_mode res = aarch64_simd_container_mode (element_mode, 128);
|
|
+ if (VECTOR_MODE_P (res))
|
|
+ return res;
|
|
}
|
|
|
|
- return default_get_mask_mode (nunits, nbytes);
|
|
+ return default_vectorize_related_mode (vector_mode, element_mode, nunits);
|
|
}
|
|
|
|
/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
|
|
@@ -10897,7 +10917,9 @@ aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
|
|
/* Caller assumes we cannot fail. */
|
|
gcc_assert (use_rsqrt_p (mode));
|
|
|
|
- machine_mode mmsk = mode_for_int_vector (mode).require ();
|
|
+ machine_mode mmsk = (VECTOR_MODE_P (mode)
|
|
+ ? related_int_vector_mode (mode).require ()
|
|
+ : int_mode_for_mode (mode).require ());
|
|
rtx xmsk = gen_reg_rtx (mmsk);
|
|
if (!recp)
|
|
/* When calculating the approximate square root, compare the
|
|
@@ -14226,13 +14248,34 @@ aarch64_preferred_simd_mode (scalar_mode mode)
|
|
|
|
/* Return a list of possible vector sizes for the vectorizer
|
|
to iterate over. */
|
|
-static void
|
|
-aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
|
|
+static unsigned int
|
|
+aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
|
|
{
|
|
if (TARGET_SVE)
|
|
- sizes->safe_push (BYTES_PER_SVE_VECTOR);
|
|
- sizes->safe_push (16);
|
|
- sizes->safe_push (8);
|
|
+ modes->safe_push (VNx16QImode);
|
|
+
|
|
+ /* Try using 128-bit vectors for all element types. */
|
|
+ modes->safe_push (V16QImode);
|
|
+
|
|
+ /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
|
|
+ for wider elements. */
|
|
+ modes->safe_push (V8QImode);
|
|
+
|
|
+ /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
|
|
+ for wider elements.
|
|
+
|
|
+ TODO: We could support a limited form of V4QImode too, so that
|
|
+ we use 32-bit vectors for 8-bit elements. */
|
|
+ modes->safe_push (V4HImode);
|
|
+
|
|
+ /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
|
|
+ for 64-bit elements.
|
|
+
|
|
+ TODO: We could similarly support limited forms of V2QImode and V2HImode
|
|
+ for this case. */
|
|
+ modes->safe_push (V2SImode);
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
/* Implement TARGET_MANGLE_TYPE. */
|
|
@@ -15191,6 +15234,45 @@ aarch64_expand_vector_init (rtx target, rtx vals)
|
|
rtx v0 = XVECEXP (vals, 0, 0);
|
|
bool all_same = true;
|
|
|
|
+ /* This is a special vec_init<M><N> where N is not an element mode but a
|
|
+ vector mode with half the elements of M. We expect to find two entries
|
|
+ of mode N in VALS and we must put their concatentation into TARGET. */
|
|
+ if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
|
|
+ {
|
|
+ gcc_assert (known_eq (GET_MODE_SIZE (mode),
|
|
+ 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
|
|
+ rtx lo = XVECEXP (vals, 0, 0);
|
|
+ rtx hi = XVECEXP (vals, 0, 1);
|
|
+ machine_mode narrow_mode = GET_MODE (lo);
|
|
+ gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
|
|
+ gcc_assert (narrow_mode == GET_MODE (hi));
|
|
+
|
|
+ /* When we want to concatenate a half-width vector with zeroes we can
|
|
+ use the aarch64_combinez[_be] patterns. Just make sure that the
|
|
+ zeroes are in the right half. */
|
|
+ if (BYTES_BIG_ENDIAN
|
|
+ && aarch64_simd_imm_zero (lo, narrow_mode)
|
|
+ && general_operand (hi, narrow_mode))
|
|
+ emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
|
|
+ else if (!BYTES_BIG_ENDIAN
|
|
+ && aarch64_simd_imm_zero (hi, narrow_mode)
|
|
+ && general_operand (lo, narrow_mode))
|
|
+ emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
|
|
+ else
|
|
+ {
|
|
+ /* Else create the two half-width registers and combine them. */
|
|
+ if (!REG_P (lo))
|
|
+ lo = force_reg (GET_MODE (lo), lo);
|
|
+ if (!REG_P (hi))
|
|
+ hi = force_reg (GET_MODE (hi), hi);
|
|
+
|
|
+ if (BYTES_BIG_ENDIAN)
|
|
+ std::swap (lo, hi);
|
|
+ emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+
|
|
/* Count the number of variable elements to initialise. */
|
|
for (int i = 0; i < n_elts; ++i)
|
|
{
|
|
@@ -16684,7 +16766,7 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
|
|
if (d->testing_p)
|
|
return true;
|
|
|
|
- machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
|
|
+ machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
|
|
rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
|
|
if (d->one_vector_p)
|
|
emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
|
|
@@ -17064,9 +17146,7 @@ void
|
|
aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
|
|
rtx *ops)
|
|
{
|
|
- machine_mode pred_mode
|
|
- = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
|
|
- GET_MODE_SIZE (cmp_mode)).require ();
|
|
+ machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
|
|
rtx pred = gen_reg_rtx (pred_mode);
|
|
if (FLOAT_MODE_P (cmp_mode))
|
|
{
|
|
@@ -19363,9 +19443,9 @@ aarch64_libgcc_floating_mode_supported_p
|
|
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
|
|
aarch64_builtin_vectorized_function
|
|
|
|
-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
|
-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
|
|
- aarch64_autovectorize_vector_sizes
|
|
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
|
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
|
|
+ aarch64_autovectorize_vector_modes
|
|
|
|
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
|
|
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
|
|
@@ -19398,6 +19478,8 @@ aarch64_libgcc_floating_mode_supported_p
|
|
#define TARGET_VECTORIZE_VEC_PERM_CONST \
|
|
aarch64_vectorize_vec_perm_const
|
|
|
|
+#undef TARGET_VECTORIZE_RELATED_MODE
|
|
+#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
|
|
#undef TARGET_VECTORIZE_GET_MASK_MODE
|
|
#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
|
|
#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
|
|
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
|
index 6caeeac8086..c7ccd5bf6fe 100644
|
|
--- a/gcc/config/aarch64/iterators.md
|
|
+++ b/gcc/config/aarch64/iterators.md
|
|
@@ -663,6 +663,9 @@
|
|
(QI "b") (HI "h")
|
|
(SI "s") (DI "d")])
|
|
|
|
+;; Like Vetype, but map to types that are a quarter of the element size.
|
|
+(define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")])
|
|
+
|
|
;; Equivalent of "size" for a vector element.
|
|
(define_mode_attr Vesize [(VNx16QI "b")
|
|
(VNx8HI "h") (VNx8HF "h")
|
|
@@ -765,6 +768,7 @@
|
|
;; Half modes of all vector modes, in lower-case.
|
|
(define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi")
|
|
(V4HI "v2hi") (V8HI "v4hi")
|
|
+ (V8HF "v4hf")
|
|
(V2SI "si") (V4SI "v2si")
|
|
(V2DI "di") (V2SF "sf")
|
|
(V4SF "v2sf") (V2DF "df")])
|
|
@@ -800,6 +804,8 @@
|
|
(V2DI "V2SI")
|
|
(DI "SI") (SI "HI")
|
|
(HI "QI")])
|
|
+(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
|
|
+ (V2DI "v2si")])
|
|
|
|
;; Narrowed quad-modes for VQN (Used for XTN2).
|
|
(define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
|
|
@@ -1029,8 +1035,10 @@
|
|
(V2SF "p") (V4SF "v")
|
|
(V4HF "v") (V8HF "v")])
|
|
|
|
-(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
|
|
-(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
|
|
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
|
|
+ (VNx4SI "vnx16qi") (VNx2DI "vnx8hi")])
|
|
+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")
|
|
+ (VNx4SI "VNx16QI") (VNx2DI "VNx8HI")])
|
|
|
|
|
|
;; Register suffix for DOTPROD input types from the return type.
|
|
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
|
|
index f7ff95a0edf..325dd3cea9a 100644
|
|
--- a/gcc/config/arc/arc.c
|
|
+++ b/gcc/config/arc/arc.c
|
|
@@ -477,16 +477,17 @@ arc_preferred_simd_mode (scalar_mode mode)
|
|
}
|
|
|
|
/* Implements target hook
|
|
- TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
|
|
+ TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
|
|
|
|
-static void
|
|
-arc_autovectorize_vector_sizes (vector_sizes *sizes)
|
|
+static unsigned int
|
|
+arc_autovectorize_vector_modes (vector_modes *modes, bool)
|
|
{
|
|
if (TARGET_PLUS_QMACW)
|
|
{
|
|
- sizes->quick_push (8);
|
|
- sizes->quick_push (4);
|
|
+ modes->quick_push (V4HImode);
|
|
+ modes->quick_push (V2HImode);
|
|
}
|
|
+ return 0;
|
|
}
|
|
|
|
|
|
@@ -596,8 +597,8 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
|
|
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
|
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
|
|
|
|
-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
|
-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
|
|
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
|
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES arc_autovectorize_vector_modes
|
|
|
|
#undef TARGET_CAN_USE_DOLOOP_P
|
|
#define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
|
|
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
|
|
index cdfc0f9e72f..1a4a4b7bc58 100644
|
|
--- a/gcc/config/arm/arm.c
|
|
+++ b/gcc/config/arm/arm.c
|
|
@@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
|
|
static void arm_conditional_register_usage (void);
|
|
static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
|
|
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
|
|
-static void arm_autovectorize_vector_sizes (vector_sizes *);
|
|
+static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
|
|
static int arm_default_branch_cost (bool, bool);
|
|
static int arm_cortex_a5_branch_cost (bool, bool);
|
|
static int arm_cortex_m_branch_cost (bool, bool);
|
|
@@ -519,9 +519,9 @@ static const struct attribute_spec arm_attribute_table[] =
|
|
#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
|
|
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
|
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
|
|
-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
|
-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
|
|
- arm_autovectorize_vector_sizes
|
|
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
|
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
|
|
+ arm_autovectorize_vector_modes
|
|
|
|
#undef TARGET_MACHINE_DEPENDENT_REORG
|
|
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
|
|
@@ -28446,14 +28446,15 @@ arm_vector_alignment (const_tree type)
|
|
return align;
|
|
}
|
|
|
|
-static void
|
|
-arm_autovectorize_vector_sizes (vector_sizes *sizes)
|
|
+static unsigned int
|
|
+arm_autovectorize_vector_modes (vector_modes *modes, bool)
|
|
{
|
|
if (!TARGET_NEON_VECTORIZE_DOUBLE)
|
|
{
|
|
- sizes->safe_push (16);
|
|
- sizes->safe_push (8);
|
|
+ modes->safe_push (V16QImode);
|
|
+ modes->safe_push (V8QImode);
|
|
}
|
|
+ return 0;
|
|
}
|
|
|
|
static bool
|
|
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
|
|
index 99fa45edcd4..eb06ff9e05b 100644
|
|
--- a/gcc/config/gcn/gcn.c
|
|
+++ b/gcc/config/gcn/gcn.c
|
|
@@ -3800,8 +3800,7 @@ gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
|
a vector. */
|
|
|
|
opt_machine_mode
|
|
-gcn_vectorize_get_mask_mode (poly_uint64 ARG_UNUSED (nunits),
|
|
- poly_uint64 ARG_UNUSED (length))
|
|
+gcn_vectorize_get_mask_mode (machine_mode)
|
|
{
|
|
/* GCN uses a DImode bit-mask. */
|
|
return DImode;
|
|
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
|
|
index 1bca5a7eea6..5a0f8a0eb72 100644
|
|
--- a/gcc/config/i386/i386.c
|
|
+++ b/gcc/config/i386/i386.c
|
|
@@ -9647,7 +9647,6 @@ ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
|
|
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
|
|
CUMULATIVE_ARGS next_cum;
|
|
tree fntype;
|
|
- int max;
|
|
|
|
gcc_assert (!no_rtl);
|
|
|
|
@@ -9663,10 +9662,6 @@ ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
|
|
if (stdarg_p (fntype))
|
|
ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
|
|
true);
|
|
-
|
|
- max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
|
|
- if (max > X86_64_REGPARM_MAX)
|
|
- max = X86_64_REGPARM_MAX;
|
|
}
|
|
|
|
|
|
@@ -11806,7 +11801,6 @@ choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
|
|
{
|
|
base_reg = hard_frame_pointer_rtx;
|
|
base_offset = toffset;
|
|
- len = tlen;
|
|
}
|
|
}
|
|
}
|
|
@@ -39699,12 +39693,10 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
|
|
static reg_class_t
|
|
ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
|
|
{
|
|
- machine_mode mode = GET_MODE (x);
|
|
-
|
|
/* Restrict the output reload class to the register bank that we are doing
|
|
math on. If we would like not to return a subset of CLASS, reject this
|
|
alternative: if reload cannot do this, it will still use its choice. */
|
|
- mode = GET_MODE (x);
|
|
+ machine_mode mode = GET_MODE (x);
|
|
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
|
|
return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
|
|
|
|
@@ -45666,14 +45658,13 @@ ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
|
|
0, OPTAB_DIRECT);
|
|
|
|
/* Compensate. */
|
|
- tmp = gen_reg_rtx (mode);
|
|
/* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
|
|
tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
|
|
- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
|
|
+ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one)));
|
|
xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
|
|
/* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
|
|
tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
|
|
- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
|
|
+ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one)));
|
|
xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
|
|
|
|
/* res = copysign (xa2, operand1) */
|
|
@@ -50238,27 +50229,42 @@ ix86_split_reduction (machine_mode mode)
|
|
vectors. If AVX512F is enabled then try vectorizing with 512bit,
|
|
256bit and 128bit vectors. */
|
|
|
|
-static void
|
|
-ix86_autovectorize_vector_sizes (vector_sizes *sizes)
|
|
+static unsigned int
|
|
+ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
|
|
{
|
|
if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
|
|
{
|
|
- sizes->safe_push (64);
|
|
- sizes->safe_push (32);
|
|
- sizes->safe_push (16);
|
|
+ modes->safe_push (V64QImode);
|
|
+ modes->safe_push (V32QImode);
|
|
+ modes->safe_push (V16QImode);
|
|
+ }
|
|
+ else if (TARGET_AVX512F && all)
|
|
+ {
|
|
+ modes->safe_push (V32QImode);
|
|
+ modes->safe_push (V16QImode);
|
|
+ modes->safe_push (V64QImode);
|
|
}
|
|
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
|
|
{
|
|
- sizes->safe_push (32);
|
|
- sizes->safe_push (16);
|
|
+ modes->safe_push (V32QImode);
|
|
+ modes->safe_push (V16QImode);
|
|
+ }
|
|
+ else if (TARGET_AVX && all)
|
|
+ {
|
|
+ modes->safe_push (V16QImode);
|
|
+ modes->safe_push (V32QImode);
|
|
}
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
/* Implemenation of targetm.vectorize.get_mask_mode. */
|
|
|
|
static opt_machine_mode
|
|
-ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
|
|
+ix86_get_mask_mode (machine_mode data_mode)
|
|
{
|
|
+ unsigned vector_size = GET_MODE_SIZE (data_mode);
|
|
+ unsigned nunits = GET_MODE_NUNITS (data_mode);
|
|
unsigned elem_size = vector_size / nunits;
|
|
|
|
/* Scalar mask case. */
|
|
@@ -51849,9 +51855,9 @@ ix86_run_selftests (void)
|
|
#undef TARGET_VECTORIZE_SPLIT_REDUCTION
|
|
#define TARGET_VECTORIZE_SPLIT_REDUCTION \
|
|
ix86_split_reduction
|
|
-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
|
-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
|
|
- ix86_autovectorize_vector_sizes
|
|
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
|
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
|
|
+ ix86_autovectorize_vector_modes
|
|
#undef TARGET_VECTORIZE_GET_MASK_MODE
|
|
#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
|
|
#undef TARGET_VECTORIZE_INIT_COST
|
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
|
index 18cc39ae521..8c961f12a42 100644
|
|
--- a/gcc/config/i386/sse.md
|
|
+++ b/gcc/config/i386/sse.md
|
|
@@ -16441,10 +16441,9 @@
|
|
(unspec:VF_128_256
|
|
[(match_operand:VF_128_256 1 "register_operand" "0,0,x")
|
|
(match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
|
|
- (subreg:VF_128_256
|
|
- (lt:<sseintvecmode>
|
|
- (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
|
|
- (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
|
|
+ (lt:VF_128_256
|
|
+ (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
|
|
+ (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
|
|
UNSPEC_BLENDV))]
|
|
"TARGET_SSE4_1"
|
|
"#"
|
|
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
|
|
index d758fbf1be6..1008947209e 100644
|
|
--- a/gcc/config/mips/mips.c
|
|
+++ b/gcc/config/mips/mips.c
|
|
@@ -13457,13 +13457,14 @@ mips_preferred_simd_mode (scalar_mode mode)
|
|
return word_mode;
|
|
}
|
|
|
|
-/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
|
|
+/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
|
|
|
|
-static void
|
|
-mips_autovectorize_vector_sizes (vector_sizes *sizes)
|
|
+static unsigned int
|
|
+mips_autovectorize_vector_modes (vector_modes *modes, bool)
|
|
{
|
|
if (ISA_HAS_MSA)
|
|
- sizes->safe_push (16);
|
|
+ modes->safe_push (V16QImode);
|
|
+ return 0;
|
|
}
|
|
|
|
/* Implement TARGET_INIT_LIBFUNCS. */
|
|
@@ -22676,9 +22677,9 @@ mips_starting_frame_offset (void)
|
|
|
|
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
|
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode
|
|
-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
|
-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
|
|
- mips_autovectorize_vector_sizes
|
|
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
|
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
|
|
+ mips_autovectorize_vector_modes
|
|
|
|
#undef TARGET_INIT_BUILTINS
|
|
#define TARGET_INIT_BUILTINS mips_init_builtins
|
|
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
|
|
index 87d60078bb0..8f046de424c 100644
|
|
--- a/gcc/config/rs6000/rs6000.c
|
|
+++ b/gcc/config/rs6000/rs6000.c
|
|
@@ -15457,7 +15457,7 @@ static tree
|
|
fold_build_vec_cmp (tree_code code, tree type,
|
|
tree arg0, tree arg1)
|
|
{
|
|
- tree cmp_type = build_same_sized_truth_vector_type (type);
|
|
+ tree cmp_type = truth_type_for (type);
|
|
tree zero_vec = build_zero_cst (type);
|
|
tree minus_one_vec = build_minus_one_cst (type);
|
|
tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
|
|
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
|
|
index db3f94978ec..c35666dec83 100644
|
|
--- a/gcc/config/s390/s390.c
|
|
+++ b/gcc/config/s390/s390.c
|
|
@@ -6588,7 +6588,7 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
|
|
case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
|
|
default: gcc_unreachable ();
|
|
}
|
|
- scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
|
|
+ scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
|
|
|
|
if (inv_p)
|
|
all_p = !all_p;
|
|
@@ -6694,7 +6694,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
|
|
|
|
/* We always use an integral type vector to hold the comparison
|
|
result. */
|
|
- result_mode = mode_for_int_vector (cmp_mode).require ();
|
|
+ result_mode = related_int_vector_mode (cmp_mode).require ();
|
|
result_target = gen_reg_rtx (result_mode);
|
|
|
|
/* We allow vector immediates as comparison operands that
|
|
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
|
|
index f365a5a7f7b..23a54f3c332 100644
|
|
--- a/gcc/cp/call.c
|
|
+++ b/gcc/cp/call.c
|
|
@@ -5161,7 +5161,7 @@ build_conditional_expr_1 (const op_location_t &loc,
|
|
|
|
if (!COMPARISON_CLASS_P (arg1))
|
|
{
|
|
- tree cmp_type = build_same_sized_truth_vector_type (arg1_type);
|
|
+ tree cmp_type = truth_type_for (arg1_type);
|
|
arg1 = build2 (NE_EXPR, cmp_type, arg1, build_zero_cst (arg1_type));
|
|
}
|
|
return build3_loc (loc, VEC_COND_EXPR, arg2_type, arg1, arg2, arg3);
|
|
diff --git a/gcc/cp/class.c b/gcc/cp/class.c
|
|
index 6b57184e081..5b0a60d61cc 100644
|
|
--- a/gcc/cp/class.c
|
|
+++ b/gcc/cp/class.c
|
|
@@ -4760,8 +4760,6 @@ adjust_clone_args (tree decl)
|
|
tree orig_decl_parms = TYPE_ARG_TYPES (TREE_TYPE (decl));
|
|
tree decl_parms, clone_parms;
|
|
|
|
- clone_parms = orig_clone_parms;
|
|
-
|
|
/* Skip the 'this' parameter. */
|
|
orig_clone_parms = TREE_CHAIN (orig_clone_parms);
|
|
orig_decl_parms = TREE_CHAIN (orig_decl_parms);
|
|
@@ -8581,7 +8579,6 @@ dump_class_hierarchy_r (FILE *stream,
|
|
tree base_binfo;
|
|
int i;
|
|
|
|
- indented = maybe_indent_hierarchy (stream, indent, 0);
|
|
fprintf (stream, "%s (0x" HOST_WIDE_INT_PRINT_HEX ") ",
|
|
type_as_string (BINFO_TYPE (binfo), TFF_PLAIN_IDENTIFIER),
|
|
(HOST_WIDE_INT) (uintptr_t) binfo);
|
|
@@ -8602,7 +8599,6 @@ dump_class_hierarchy_r (FILE *stream,
|
|
fprintf (stream, " virtual");
|
|
fprintf (stream, "\n");
|
|
|
|
- indented = 0;
|
|
if (BINFO_PRIMARY_P (binfo))
|
|
{
|
|
indented = maybe_indent_hierarchy (stream, indent + 3, indented);
|
|
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
|
|
index 39d55589ef3..5c82c2272c2 100644
|
|
--- a/gcc/cp/decl.c
|
|
+++ b/gcc/cp/decl.c
|
|
@@ -6387,7 +6387,7 @@ build_aggr_init_full_exprs (tree decl, tree init, int flags)
|
|
static tree
|
|
check_initializer (tree decl, tree init, int flags, vec<tree, va_gc> **cleanups)
|
|
{
|
|
- tree type = TREE_TYPE (decl);
|
|
+ tree type;
|
|
tree init_code = NULL;
|
|
tree core_type;
|
|
|
|
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
|
|
index e1c02d7b718..60fe58e0313 100644
|
|
--- a/gcc/cp/parser.c
|
|
+++ b/gcc/cp/parser.c
|
|
@@ -10485,7 +10485,7 @@ cp_parser_lambda_expression (cp_parser* parser)
|
|
if (ok)
|
|
maybe_add_lambda_conv_op (type);
|
|
|
|
- type = finish_struct (type, /*attributes=*/NULL_TREE);
|
|
+ finish_struct (type, /*attributes=*/NULL_TREE);
|
|
|
|
in_discarded_stmt = discarded;
|
|
|
|
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
|
|
index 4787747b6ff..ff7921533cb 100644
|
|
--- a/gcc/cp/pt.c
|
|
+++ b/gcc/cp/pt.c
|
|
@@ -7459,8 +7459,7 @@ unify_bound_ttp_args (tree tparms, tree targs, tree parm, tree& arg,
|
|
{
|
|
/* In keeping with P0522R0, adjust P's template arguments
|
|
to apply to A's template; then flatten it again. */
|
|
- tree nparmvec = parmvec;
|
|
- nparmvec = coerce_ttp_args_for_tta (arg, parmvec, tf_none);
|
|
+ tree nparmvec = coerce_ttp_args_for_tta (arg, parmvec, tf_none);
|
|
nparmvec = expand_template_argument_pack (nparmvec);
|
|
|
|
if (unify (tparms, targs, nparmvec, argvec,
|
|
@@ -7887,7 +7886,6 @@ convert_template_argument (tree parm,
|
|
invalid, but static members are OK. In any
|
|
case, grab the underlying fields/functions
|
|
and issue an error later if required. */
|
|
- orig_arg = TREE_VALUE (arg);
|
|
TREE_TYPE (arg) = unknown_type_node;
|
|
}
|
|
|
|
diff --git a/gcc/cp/rtti.c b/gcc/cp/rtti.c
|
|
index 3ca2b5e7b88..9aea6b939ec 100644
|
|
--- a/gcc/cp/rtti.c
|
|
+++ b/gcc/cp/rtti.c
|
|
@@ -209,8 +209,8 @@ build_headof (tree exp)
|
|
offset = build_vtbl_ref (cp_build_fold_indirect_ref (exp),
|
|
index);
|
|
|
|
- type = cp_build_qualified_type (ptr_type_node,
|
|
- cp_type_quals (TREE_TYPE (exp)));
|
|
+ cp_build_qualified_type (ptr_type_node,
|
|
+ cp_type_quals (TREE_TYPE (exp)));
|
|
return fold_build_pointer_plus (exp, offset);
|
|
}
|
|
|
|
diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
|
|
index 2169f8c4efd..c42fd731cd2 100644
|
|
--- a/gcc/cp/typeck.c
|
|
+++ b/gcc/cp/typeck.c
|
|
@@ -4293,7 +4293,7 @@ build_vec_cmp (tree_code code, tree type,
|
|
{
|
|
tree zero_vec = build_zero_cst (type);
|
|
tree minus_one_vec = build_minus_one_cst (type);
|
|
- tree cmp_type = build_same_sized_truth_vector_type(type);
|
|
+ tree cmp_type = truth_type_for (type);
|
|
tree cmp = build2 (code, cmp_type, arg0, arg1);
|
|
return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
|
|
}
|
|
@@ -9189,8 +9189,6 @@ convert_for_initialization (tree exp, tree type, tree rhs, int flags,
|
|
if (exp == error_mark_node)
|
|
return error_mark_node;
|
|
|
|
- rhstype = non_reference (rhstype);
|
|
-
|
|
type = complete_type (type);
|
|
|
|
if (DIRECT_INIT_EXPR_P (type, rhs))
|
|
diff --git a/gcc/cselib.c b/gcc/cselib.c
|
|
index 84c17c23f6d..108b2588cf9 100644
|
|
--- a/gcc/cselib.c
|
|
+++ b/gcc/cselib.c
|
|
@@ -2518,13 +2518,12 @@ cselib_record_sets (rtx_insn *insn)
|
|
int n_sets = 0;
|
|
int i;
|
|
struct cselib_set sets[MAX_SETS];
|
|
- rtx body = PATTERN (insn);
|
|
rtx cond = 0;
|
|
int n_sets_before_autoinc;
|
|
int n_strict_low_parts = 0;
|
|
struct cselib_record_autoinc_data data;
|
|
|
|
- body = PATTERN (insn);
|
|
+ rtx body = PATTERN (insn);
|
|
if (GET_CODE (body) == COND_EXEC)
|
|
{
|
|
cond = COND_EXEC_TEST (body);
|
|
diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
|
|
index 2abff92fc88..6f5499b08ee 100644
|
|
--- a/gcc/d/d-codegen.cc
|
|
+++ b/gcc/d/d-codegen.cc
|
|
@@ -1397,7 +1397,7 @@ build_boolop (tree_code code, tree arg0, tree arg1)
|
|
/* Build a vector comparison.
|
|
VEC_COND_EXPR <e1 op e2, { -1, -1, -1, -1 }, { 0, 0, 0, 0 }>; */
|
|
tree type = TREE_TYPE (arg0);
|
|
- tree cmptype = build_same_sized_truth_vector_type (type);
|
|
+ tree cmptype = truth_type_for (type);
|
|
tree cmp = fold_build2_loc (input_location, code, cmptype, arg0, arg1);
|
|
|
|
return fold_build3_loc (input_location, VEC_COND_EXPR, type, cmp,
|
|
diff --git a/gcc/df-scan.c b/gcc/df-scan.c
|
|
index 08d7af33371..84c2e54c855 100644
|
|
--- a/gcc/df-scan.c
|
|
+++ b/gcc/df-scan.c
|
|
@@ -229,7 +229,6 @@ void
|
|
df_scan_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
|
|
{
|
|
struct df_scan_problem_data *problem_data;
|
|
- unsigned int insn_num = get_max_uid () + 1;
|
|
basic_block bb;
|
|
|
|
/* Given the number of pools, this is really faster than tearing
|
|
@@ -257,7 +256,6 @@ df_scan_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
|
|
bitmap_obstack_initialize (&problem_data->reg_bitmaps);
|
|
bitmap_obstack_initialize (&problem_data->insn_bitmaps);
|
|
|
|
- insn_num += insn_num / 4;
|
|
df_grow_reg_info ();
|
|
|
|
df_grow_insn_info ();
|
|
diff --git a/gcc/doc/poly-int.texi b/gcc/doc/poly-int.texi
|
|
index 1023e823cb3..d60bb02aabf 100644
|
|
--- a/gcc/doc/poly-int.texi
|
|
+++ b/gcc/doc/poly-int.texi
|
|
@@ -803,6 +803,18 @@ the assertion is known to hold.
|
|
@item constant_lower_bound (@var{a})
|
|
Assert that @var{a} is nonnegative and return the smallest value it can have.
|
|
|
|
+@item constant_lower_bound_with_limit (@var{a}, @var{b})
|
|
+Return the least value @var{a} can have, given that the context in
|
|
+which @var{a} appears guarantees that the answer is no less than @var{b}.
|
|
+In other words, the caller is asserting that @var{a} is greater than or
|
|
+equal to @var{b} even if @samp{known_ge (@var{a}, @var{b})} doesn't hold.
|
|
+
|
|
+@item constant_upper_bound_with_limit (@var{a}, @var{b})
|
|
+Return the greatest value @var{a} can have, given that the context in
|
|
+which @var{a} appears guarantees that the answer is no greater than @var{b}.
|
|
+In other words, the caller is asserting that @var{a} is less than or equal
|
|
+to @var{b} even if @samp{known_le (@var{a}, @var{b})} doesn't hold.
|
|
+
|
|
@item lower_bound (@var{a}, @var{b})
|
|
Return a value that is always less than or equal to both @var{a} and @var{b}.
|
|
It will be the greatest such value for some indeterminate values
|
|
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
|
index 8c8978bb13a..73db70867b4 100644
|
|
--- a/gcc/doc/tm.texi
|
|
+++ b/gcc/doc/tm.texi
|
|
@@ -6016,27 +6016,71 @@ against lower halves of vectors recursively until the specified mode is
|
|
reached. The default is @var{mode} which means no splitting.
|
|
@end deftypefn
|
|
|
|
-@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
|
|
-If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
|
|
-the only one that is worth considering, this hook should add all suitable
|
|
-vector sizes to @var{sizes}, in order of decreasing preference. The first
|
|
-one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
|
|
+@deftypefn {Target Hook} {unsigned int} TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES (vector_modes *@var{modes}, bool @var{all})
|
|
+If using the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}
|
|
+is not the only approach worth considering, this hook should add one mode to
|
|
+@var{modes} for each useful alternative approach. These modes are then
|
|
+passed to @code{TARGET_VECTORIZE_RELATED_MODE} to obtain the vector mode
|
|
+for a given element mode.
|
|
+
|
|
+The modes returned in @var{modes} should use the smallest element mode
|
|
+possible for the vectorization approach that they represent, preferring
|
|
+integer modes over floating-poing modes in the event of a tie. The first
|
|
+mode should be the @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} for its
|
|
+element mode.
|
|
+
|
|
+If @var{all} is true, add suitable vector modes even when they are generally
|
|
+not expected to be worthwhile.
|
|
+
|
|
+The hook returns a bitmask of flags that control how the modes in
|
|
+@var{modes} are used. The flags are:
|
|
+@table @code
|
|
+@item VECT_COMPARE_COSTS
|
|
+Tells the loop vectorizer to try all the provided modes and pick the one
|
|
+with the lowest cost. By default the vectorizer will choose the first
|
|
+mode that works.
|
|
+@end table
|
|
|
|
The hook does not need to do anything if the vector returned by
|
|
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
|
|
-for autovectorization. The default implementation does nothing.
|
|
-@end deftypefn
|
|
-
|
|
-@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_GET_MASK_MODE (poly_uint64 @var{nunits}, poly_uint64 @var{length})
|
|
-A vector mask is a value that holds one boolean result for every element
|
|
-in a vector. This hook returns the machine mode that should be used to
|
|
-represent such a mask when the vector in question is @var{length} bytes
|
|
-long and contains @var{nunits} elements. The hook returns an empty
|
|
-@code{opt_machine_mode} if no such mode exists.
|
|
-
|
|
-The default implementation returns the mode of an integer vector that
|
|
-is @var{length} bytes long and that contains @var{nunits} elements,
|
|
-if such a mode exists.
|
|
+for autovectorization. The default implementation adds no modes and
|
|
+returns 0.
|
|
+@end deftypefn
|
|
+
|
|
+@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_RELATED_MODE (machine_mode @var{vector_mode}, scalar_mode @var{element_mode}, poly_uint64 @var{nunits})
|
|
+If a piece of code is using vector mode @var{vector_mode} and also wants
|
|
+to operate on elements of mode @var{element_mode}, return the vector mode
|
|
+it should use for those elements. If @var{nunits} is nonzero, ensure that
|
|
+the mode has exactly @var{nunits} elements, otherwise pick whichever vector
|
|
+size pairs the most naturally with @var{vector_mode}. Return an empty
|
|
+@code{opt_machine_mode} if there is no supported vector mode with the
|
|
+required properties.
|
|
+
|
|
+There is no prescribed way of handling the case in which @var{nunits}
|
|
+is zero. One common choice is to pick a vector mode with the same size
|
|
+as @var{vector_mode}; this is the natural choice if the target has a
|
|
+fixed vector size. Another option is to choose a vector mode with the
|
|
+same number of elements as @var{vector_mode}; this is the natural choice
|
|
+if the target has a fixed number of elements. Alternatively, the hook
|
|
+might choose a middle ground, such as trying to keep the number of
|
|
+elements as similar as possible while applying maximum and minimum
|
|
+vector sizes.
|
|
+
|
|
+The default implementation uses @code{mode_for_vector} to find the
|
|
+requested mode, returning a mode with the same size as @var{vector_mode}
|
|
+when @var{nunits} is zero. This is the correct behavior for most targets.
|
|
+@end deftypefn
|
|
+
|
|
+@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_GET_MASK_MODE (machine_mode @var{mode})
|
|
+Return the mode to use for a vector mask that holds one boolean
|
|
+result for each element of vector mode @var{mode}. The returned mask mode
|
|
+can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of
|
|
+booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class
|
|
+@code{MODE_INT}). Return an empty @code{opt_machine_mode} if no such
|
|
+mask mode exists.
|
|
+
|
|
+The default implementation returns a @code{MODE_VECTOR_INT} with the
|
|
+same size and number of elements as @var{mode}, if such a mode exists.
|
|
@end deftypefn
|
|
|
|
@deftypefn {Target Hook} bool TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE (unsigned @var{ifn})
|
|
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
|
index fe1194ef91a..bc362dca0f5 100644
|
|
--- a/gcc/doc/tm.texi.in
|
|
+++ b/gcc/doc/tm.texi.in
|
|
@@ -4172,7 +4172,9 @@ address; but often a machine-dependent strategy can generate better code.
|
|
|
|
@hook TARGET_VECTORIZE_SPLIT_REDUCTION
|
|
|
|
-@hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
|
|
+@hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
|
|
+
|
|
+@hook TARGET_VECTORIZE_RELATED_MODE
|
|
|
|
@hook TARGET_VECTORIZE_GET_MASK_MODE
|
|
|
|
diff --git a/gcc/dojump.c b/gcc/dojump.c
|
|
index 8626689463e..bac37a357a9 100644
|
|
--- a/gcc/dojump.c
|
|
+++ b/gcc/dojump.c
|
|
@@ -668,8 +668,6 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
|
|
code = LE;
|
|
if_true_label = if_false_label;
|
|
if_false_label = drop_through_label;
|
|
- drop_through_if_true = false;
|
|
- drop_through_if_false = true;
|
|
prob = prob.invert ();
|
|
}
|
|
|
|
diff --git a/gcc/early-remat.c b/gcc/early-remat.c
|
|
index 122891c1edb..0396f16babf 100644
|
|
--- a/gcc/early-remat.c
|
|
+++ b/gcc/early-remat.c
|
|
@@ -1123,7 +1123,6 @@ early_remat::record_equiv_candidates (unsigned int cand1_index,
|
|
ec->representative = cand1_index;
|
|
cand1->equiv_class = ec;
|
|
}
|
|
- cand1 = &m_candidates[ec->representative];
|
|
cand2->equiv_class = ec;
|
|
bitmap_set_bit (ec->members, cand2_index);
|
|
if (cand2_index > ec->representative)
|
|
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
|
|
index 15dffa58a2e..78104603c76 100644
|
|
--- a/gcc/emit-rtl.c
|
|
+++ b/gcc/emit-rtl.c
|
|
@@ -3993,7 +3993,7 @@ try_split (rtx pat, rtx_insn *trial, int last)
|
|
before = PREV_INSN (trial);
|
|
after = NEXT_INSN (trial);
|
|
|
|
- tem = emit_insn_after_setloc (seq, trial, INSN_LOCATION (trial));
|
|
+ emit_insn_after_setloc (seq, trial, INSN_LOCATION (trial));
|
|
|
|
delete_insn (trial);
|
|
|
|
diff --git a/gcc/expmed.c b/gcc/expmed.c
|
|
index c5f5499c013..34cdfbf151a 100644
|
|
--- a/gcc/expmed.c
|
|
+++ b/gcc/expmed.c
|
|
@@ -1662,12 +1662,10 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
|
|
poly_uint64 nunits;
|
|
if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
|
|
GET_MODE_UNIT_BITSIZE (tmode), &nunits)
|
|
- || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
|
|
- || !VECTOR_MODE_P (new_mode)
|
|
+ || !related_vector_mode (tmode, inner_mode,
|
|
+ nunits).exists (&new_mode)
|
|
|| maybe_ne (GET_MODE_SIZE (new_mode),
|
|
- GET_MODE_SIZE (GET_MODE (op0)))
|
|
- || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
|
|
- || !targetm.vector_mode_supported_p (new_mode))
|
|
+ GET_MODE_SIZE (GET_MODE (op0))))
|
|
new_mode = VOIDmode;
|
|
}
|
|
poly_uint64 pos;
|
|
diff --git a/gcc/expr.c b/gcc/expr.c
|
|
index fa15b7eceae..5e3700fe15f 100644
|
|
--- a/gcc/expr.c
|
|
+++ b/gcc/expr.c
|
|
@@ -249,6 +249,31 @@ convert_move (rtx to, rtx from, int unsignedp)
|
|
|
|
if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
|
|
{
|
|
+ if (GET_MODE_UNIT_PRECISION (to_mode)
|
|
+ > GET_MODE_UNIT_PRECISION (from_mode))
|
|
+ {
|
|
+ optab op = unsignedp ? zext_optab : sext_optab;
|
|
+ insn_code icode = convert_optab_handler (op, to_mode, from_mode);
|
|
+ if (icode != CODE_FOR_nothing)
|
|
+ {
|
|
+ emit_unop_insn (icode, to, from,
|
|
+ unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (GET_MODE_UNIT_PRECISION (to_mode)
|
|
+ < GET_MODE_UNIT_PRECISION (from_mode))
|
|
+ {
|
|
+ insn_code icode = convert_optab_handler (trunc_optab,
|
|
+ to_mode, from_mode);
|
|
+ if (icode != CODE_FOR_nothing)
|
|
+ {
|
|
+ emit_unop_insn (icode, to, from, TRUNCATE);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
|
|
GET_MODE_BITSIZE (to_mode)));
|
|
|
|
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
|
|
index 049fee91876..e2e66246315 100644
|
|
--- a/gcc/fold-const.h
|
|
+++ b/gcc/fold-const.h
|
|
@@ -83,7 +83,7 @@ extern bool fold_deferring_overflow_warnings_p (void);
|
|
extern void fold_overflow_warning (const char*, enum warn_strict_overflow_code);
|
|
extern enum tree_code fold_div_compare (enum tree_code, tree, tree,
|
|
tree *, tree *, bool *);
|
|
-extern int operand_equal_p (const_tree, const_tree, unsigned int);
|
|
+extern int operand_equal_p (const_tree, const_tree, unsigned int flags = 0);
|
|
extern int multiple_of_p (tree, const_tree, const_tree);
|
|
#define omit_one_operand(T1,T2,T3)\
|
|
omit_one_operand_loc (UNKNOWN_LOCATION, T1, T2, T3)
|
|
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
|
|
index cf2c9de2d35..f2966fadae8 100644
|
|
--- a/gcc/fwprop.c
|
|
+++ b/gcc/fwprop.c
|
|
@@ -448,6 +448,18 @@ enum {
|
|
PR_OPTIMIZE_FOR_SPEED = 4
|
|
};
|
|
|
|
+/* Check that X has a single def. */
|
|
+
|
|
+static bool
|
|
+reg_single_def_p (rtx x)
|
|
+{
|
|
+ if (!REG_P (x))
|
|
+ return false;
|
|
+
|
|
+ int regno = REGNO (x);
|
|
+ return (DF_REG_DEF_COUNT (regno) == 1
|
|
+ && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), regno));
|
|
+}
|
|
|
|
/* Replace all occurrences of OLD in *PX with NEW and try to simplify the
|
|
resulting expression. Replace *PX with a new RTL expression if an
|
|
@@ -547,6 +559,54 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
|
|
tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
|
|
SUBREG_BYTE (x));
|
|
}
|
|
+
|
|
+ else
|
|
+ {
|
|
+ rtvec vec;
|
|
+ rtvec newvec;
|
|
+ const char *fmt = GET_RTX_FORMAT (code);
|
|
+ rtx op;
|
|
+
|
|
+ for (int i = 0; fmt[i]; i++)
|
|
+ switch (fmt[i])
|
|
+ {
|
|
+ case 'E':
|
|
+ vec = XVEC (x, i);
|
|
+ newvec = vec;
|
|
+ for (int j = 0; j < GET_NUM_ELEM (vec); j++)
|
|
+ {
|
|
+ op = RTVEC_ELT (vec, j);
|
|
+ valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
|
|
+ if (op != RTVEC_ELT (vec, j))
|
|
+ {
|
|
+ if (newvec == vec)
|
|
+ {
|
|
+ newvec = shallow_copy_rtvec (vec);
|
|
+ if (!tem)
|
|
+ tem = shallow_copy_rtx (x);
|
|
+ XVEC (tem, i) = newvec;
|
|
+ }
|
|
+ RTVEC_ELT (newvec, j) = op;
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case 'e':
|
|
+ if (XEXP (x, i))
|
|
+ {
|
|
+ op = XEXP (x, i);
|
|
+ valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
|
|
+ if (op != XEXP (x, i))
|
|
+ {
|
|
+ if (!tem)
|
|
+ tem = shallow_copy_rtx (x);
|
|
+ XEXP (tem, i) = op;
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
break;
|
|
|
|
case RTX_OBJ:
|
|
@@ -1370,10 +1430,11 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
|
|
|
|
/* Given a use USE of an insn, if it has a single reaching
|
|
definition, try to forward propagate it into that insn.
|
|
- Return true if cfg cleanup will be needed. */
|
|
+ Return true if cfg cleanup will be needed.
|
|
+ REG_PROP_ONLY is true if we should only propagate register copies. */
|
|
|
|
static bool
|
|
-forward_propagate_into (df_ref use)
|
|
+forward_propagate_into (df_ref use, bool reg_prop_only = false)
|
|
{
|
|
df_ref def;
|
|
rtx_insn *def_insn, *use_insn;
|
|
@@ -1394,10 +1455,6 @@ forward_propagate_into (df_ref use)
|
|
if (DF_REF_IS_ARTIFICIAL (def))
|
|
return false;
|
|
|
|
- /* Do not propagate loop invariant definitions inside the loop. */
|
|
- if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
|
|
- return false;
|
|
-
|
|
/* Check if the use is still present in the insn! */
|
|
use_insn = DF_REF_INSN (use);
|
|
if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
|
|
@@ -1415,6 +1472,19 @@ forward_propagate_into (df_ref use)
|
|
if (!def_set)
|
|
return false;
|
|
|
|
+ if (reg_prop_only
|
|
+ && (!reg_single_def_p (SET_SRC (def_set))
|
|
+ || !reg_single_def_p (SET_DEST (def_set))))
|
|
+ return false;
|
|
+
|
|
+ /* Allow propagations into a loop only for reg-to-reg copies, since
|
|
+ replacing one register by another shouldn't increase the cost. */
|
|
+
|
|
+ if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father
|
|
+ && (!reg_single_def_p (SET_SRC (def_set))
|
|
+ || !reg_single_def_p (SET_DEST (def_set))))
|
|
+ return false;
|
|
+
|
|
/* Only try one kind of propagation. If two are possible, we'll
|
|
do it on the following iterations. */
|
|
if (forward_propagate_and_simplify (use, def_insn, def_set)
|
|
@@ -1483,7 +1553,7 @@ gate_fwprop (void)
|
|
}
|
|
|
|
static unsigned int
|
|
-fwprop (void)
|
|
+fwprop (bool fwprop_addr_p)
|
|
{
|
|
unsigned i;
|
|
|
|
@@ -1502,11 +1572,16 @@ fwprop (void)
|
|
|
|
df_ref use = DF_USES_GET (i);
|
|
if (use)
|
|
- if (DF_REF_TYPE (use) == DF_REF_REG_USE
|
|
- || DF_REF_BB (use)->loop_father == NULL
|
|
- /* The outer most loop is not really a loop. */
|
|
- || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
|
|
- forward_propagate_into (use);
|
|
+ {
|
|
+ if (DF_REF_TYPE (use) == DF_REF_REG_USE
|
|
+ || DF_REF_BB (use)->loop_father == NULL
|
|
+ /* The outer most loop is not really a loop. */
|
|
+ || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
|
|
+ forward_propagate_into (use, fwprop_addr_p);
|
|
+
|
|
+ else if (fwprop_addr_p)
|
|
+ forward_propagate_into (use, false);
|
|
+ }
|
|
}
|
|
|
|
fwprop_done ();
|
|
@@ -1537,7 +1612,7 @@ public:
|
|
|
|
/* opt_pass methods: */
|
|
virtual bool gate (function *) { return gate_fwprop (); }
|
|
- virtual unsigned int execute (function *) { return fwprop (); }
|
|
+ virtual unsigned int execute (function *) { return fwprop (false); }
|
|
|
|
}; // class pass_rtl_fwprop
|
|
|
|
@@ -1549,33 +1624,6 @@ make_pass_rtl_fwprop (gcc::context *ctxt)
|
|
return new pass_rtl_fwprop (ctxt);
|
|
}
|
|
|
|
-static unsigned int
|
|
-fwprop_addr (void)
|
|
-{
|
|
- unsigned i;
|
|
-
|
|
- fwprop_init ();
|
|
-
|
|
- /* Go through all the uses. df_uses_create will create new ones at the
|
|
- end, and we'll go through them as well. */
|
|
- for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
|
|
- {
|
|
- if (!propagations_left)
|
|
- break;
|
|
-
|
|
- df_ref use = DF_USES_GET (i);
|
|
- if (use)
|
|
- if (DF_REF_TYPE (use) != DF_REF_REG_USE
|
|
- && DF_REF_BB (use)->loop_father != NULL
|
|
- /* The outer most loop is not really a loop. */
|
|
- && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
|
|
- forward_propagate_into (use);
|
|
- }
|
|
-
|
|
- fwprop_done ();
|
|
- return 0;
|
|
-}
|
|
-
|
|
namespace {
|
|
|
|
const pass_data pass_data_rtl_fwprop_addr =
|
|
@@ -1600,7 +1648,7 @@ public:
|
|
|
|
/* opt_pass methods: */
|
|
virtual bool gate (function *) { return gate_fwprop (); }
|
|
- virtual unsigned int execute (function *) { return fwprop_addr (); }
|
|
+ virtual unsigned int execute (function *) { return fwprop (true); }
|
|
|
|
}; // class pass_rtl_fwprop_addr
|
|
|
|
diff --git a/gcc/gimple.c b/gcc/gimple.c
|
|
index 8fae60fb848..bf362dbe545 100644
|
|
--- a/gcc/gimple.c
|
|
+++ b/gcc/gimple.c
|
|
@@ -1771,6 +1771,8 @@ gimple_get_lhs (const gimple *stmt)
|
|
return gimple_assign_lhs (stmt);
|
|
else if (code == GIMPLE_CALL)
|
|
return gimple_call_lhs (stmt);
|
|
+ else if (code == GIMPLE_PHI)
|
|
+ return gimple_phi_result (stmt);
|
|
else
|
|
return NULL_TREE;
|
|
}
|
|
diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
|
|
index 4534d43721f..489d0b93b42 100644
|
|
--- a/gcc/graphite-scop-detection.c
|
|
+++ b/gcc/graphite-scop-detection.c
|
|
@@ -1105,14 +1105,12 @@ assign_parameter_index_in_region (tree name, sese_info_p region)
|
|
gcc_assert (TREE_CODE (name) == SSA_NAME
|
|
&& INTEGRAL_TYPE_P (TREE_TYPE (name))
|
|
&& ! defined_in_sese_p (name, region->region));
|
|
-
|
|
int i;
|
|
tree p;
|
|
FOR_EACH_VEC_ELT (region->params, i, p)
|
|
if (p == name)
|
|
return;
|
|
|
|
- i = region->params.length ();
|
|
region->params.safe_push (name);
|
|
}
|
|
|
|
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
|
|
index 95788dfee7d..21ecd566766 100644
|
|
--- a/gcc/internal-fn.c
|
|
+++ b/gcc/internal-fn.c
|
|
@@ -100,7 +100,7 @@ init_internal_fns ()
|
|
/* Create static initializers for the information returned by
|
|
direct_internal_fn. */
|
|
#define not_direct { -2, -2, false }
|
|
-#define mask_load_direct { -1, 2, false }
|
|
+#define mask_load_direct { -1, 2, true }
|
|
#define load_lanes_direct { -1, -1, false }
|
|
#define mask_load_lanes_direct { -1, -1, false }
|
|
#define gather_load_direct { -1, -1, false }
|
|
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
|
|
index aa91b56c81f..8a90ae1b4e6 100644
|
|
--- a/gcc/ira-color.c
|
|
+++ b/gcc/ira-color.c
|
|
@@ -1108,7 +1108,6 @@ setup_profitable_hard_regs (void)
|
|
|| empty_profitable_hard_regs (a))
|
|
continue;
|
|
data = ALLOCNO_COLOR_DATA (a);
|
|
- mode = ALLOCNO_MODE (a);
|
|
if ((costs = ALLOCNO_UPDATED_HARD_REG_COSTS (a)) != NULL
|
|
|| (costs = ALLOCNO_HARD_REG_COSTS (a)) != NULL)
|
|
{
|
|
diff --git a/gcc/ira.c b/gcc/ira.c
|
|
index b330f2a287b..4262e5cf3b7 100644
|
|
--- a/gcc/ira.c
|
|
+++ b/gcc/ira.c
|
|
@@ -4414,10 +4414,9 @@ rtx_moveable_p (rtx *loc, enum op_type type)
|
|
{
|
|
const char *fmt;
|
|
rtx x = *loc;
|
|
- enum rtx_code code = GET_CODE (x);
|
|
int i, j;
|
|
|
|
- code = GET_CODE (x);
|
|
+ enum rtx_code code = GET_CODE (x);
|
|
switch (code)
|
|
{
|
|
case CONST:
|
|
diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c
|
|
index ee9fd51f129..7a345a52ae1 100644
|
|
--- a/gcc/lra-eliminations.c
|
|
+++ b/gcc/lra-eliminations.c
|
|
@@ -1146,7 +1146,6 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p,
|
|
single_set without having put new body into the insn and the
|
|
re-recognition won't hurt in this rare case. */
|
|
id = lra_update_insn_recog_data (insn);
|
|
- static_id = id->insn_static_data;
|
|
}
|
|
|
|
/* Spill pseudos which are assigned to hard registers in SET. Add
|
|
diff --git a/gcc/lra.c b/gcc/lra.c
|
|
index 1d2578f8c12..10b85340fc5 100644
|
|
--- a/gcc/lra.c
|
|
+++ b/gcc/lra.c
|
|
@@ -1029,12 +1029,8 @@ lra_set_insn_recog_data (rtx_insn *insn)
|
|
data->operand_loc,
|
|
constraints, operand_mode, NULL);
|
|
if (nop > 0)
|
|
- {
|
|
- const char *p = recog_data.constraints[0];
|
|
-
|
|
- for (p = constraints[0]; *p; p++)
|
|
- nalt += *p == ',';
|
|
- }
|
|
+ for (const char *p =constraints[0]; *p; p++)
|
|
+ nalt += *p == ',';
|
|
data->insn_static_data = insn_static_data
|
|
= get_static_insn_data (-1, nop, 0, nalt);
|
|
for (i = 0; i < nop; i++)
|
|
diff --git a/gcc/machmode.h b/gcc/machmode.h
|
|
index 3a7cee88962..d564f9c6458 100644
|
|
--- a/gcc/machmode.h
|
|
+++ b/gcc/machmode.h
|
|
@@ -257,6 +257,9 @@ public:
|
|
bool exists () const;
|
|
template<typename U> bool exists (U *) const;
|
|
|
|
+ bool operator== (const T &m) const { return m_mode == m; }
|
|
+ bool operator!= (const T &m) const { return m_mode != m; }
|
|
+
|
|
private:
|
|
machine_mode m_mode;
|
|
};
|
|
@@ -841,20 +844,9 @@ smallest_int_mode_for_size (poly_uint64 size)
|
|
extern opt_scalar_int_mode int_mode_for_mode (machine_mode);
|
|
extern opt_machine_mode bitwise_mode_for_mode (machine_mode);
|
|
extern opt_machine_mode mode_for_vector (scalar_mode, poly_uint64);
|
|
-extern opt_machine_mode mode_for_int_vector (unsigned int, poly_uint64);
|
|
-
|
|
-/* Return the integer vector equivalent of MODE, if one exists. In other
|
|
- words, return the mode for an integer vector that has the same number
|
|
- of bits as MODE and the same number of elements as MODE, with the
|
|
- latter being 1 if MODE is scalar. The returned mode can be either
|
|
- an integer mode or a vector mode. */
|
|
-
|
|
-inline opt_machine_mode
|
|
-mode_for_int_vector (machine_mode mode)
|
|
-{
|
|
- return mode_for_int_vector (GET_MODE_UNIT_BITSIZE (mode),
|
|
- GET_MODE_NUNITS (mode));
|
|
-}
|
|
+extern opt_machine_mode related_vector_mode (machine_mode, scalar_mode,
|
|
+ poly_uint64 = 0);
|
|
+extern opt_machine_mode related_int_vector_mode (machine_mode);
|
|
|
|
/* A class for iterating through possible bitfield modes. */
|
|
class bit_field_mode_iterator
|
|
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
|
|
index 74159734fc8..0d7f104a2f2 100644
|
|
--- a/gcc/omp-expand.c
|
|
+++ b/gcc/omp-expand.c
|
|
@@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
|
|
&& loop->safelen > 1)
|
|
{
|
|
loop->force_vectorize = true;
|
|
+ if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
|
|
+ {
|
|
+ unsigned HOST_WIDE_INT v
|
|
+ = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
|
|
+ if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
|
|
+ loop->simdlen = v;
|
|
+ }
|
|
cfun->has_force_vectorize_loops = true;
|
|
}
|
|
else if (dont_vectorize)
|
|
diff --git a/gcc/omp-general.c b/gcc/omp-general.c
|
|
index 356772ff458..4fb53af7587 100644
|
|
--- a/gcc/omp-general.c
|
|
+++ b/gcc/omp-general.c
|
|
@@ -468,13 +468,16 @@ omp_max_vf (void)
|
|
&& global_options_set.x_flag_tree_loop_vectorize))
|
|
return 1;
|
|
|
|
- auto_vector_sizes sizes;
|
|
- targetm.vectorize.autovectorize_vector_sizes (&sizes);
|
|
- if (!sizes.is_empty ())
|
|
+ auto_vector_modes modes;
|
|
+ targetm.vectorize.autovectorize_vector_modes (&modes, true);
|
|
+ if (!modes.is_empty ())
|
|
{
|
|
poly_uint64 vf = 0;
|
|
- for (unsigned int i = 0; i < sizes.length (); ++i)
|
|
- vf = ordered_max (vf, sizes[i]);
|
|
+ for (unsigned int i = 0; i < modes.length (); ++i)
|
|
+ /* The returned modes use the smallest element size (and thus
|
|
+ the largest nunits) for the vectorization approach that they
|
|
+ represent. */
|
|
+ vf = ordered_max (vf, GET_MODE_NUNITS (modes[i]));
|
|
return vf;
|
|
}
|
|
|
|
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
|
|
index 813cefd69b9..7866639f76c 100644
|
|
--- a/gcc/omp-low.c
|
|
+++ b/gcc/omp-low.c
|
|
@@ -3650,11 +3650,8 @@ omp_clause_aligned_alignment (tree clause)
|
|
/* Otherwise return implementation defined alignment. */
|
|
unsigned int al = 1;
|
|
opt_scalar_mode mode_iter;
|
|
- auto_vector_sizes sizes;
|
|
- targetm.vectorize.autovectorize_vector_sizes (&sizes);
|
|
- poly_uint64 vs = 0;
|
|
- for (unsigned int i = 0; i < sizes.length (); ++i)
|
|
- vs = ordered_max (vs, sizes[i]);
|
|
+ auto_vector_modes modes;
|
|
+ targetm.vectorize.autovectorize_vector_modes (&modes, true);
|
|
static enum mode_class classes[]
|
|
= { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
|
|
for (int i = 0; i < 4; i += 2)
|
|
@@ -3665,19 +3662,18 @@ omp_clause_aligned_alignment (tree clause)
|
|
machine_mode vmode = targetm.vectorize.preferred_simd_mode (mode);
|
|
if (GET_MODE_CLASS (vmode) != classes[i + 1])
|
|
continue;
|
|
- while (maybe_ne (vs, 0U)
|
|
- && known_lt (GET_MODE_SIZE (vmode), vs)
|
|
- && GET_MODE_2XWIDER_MODE (vmode).exists ())
|
|
- vmode = GET_MODE_2XWIDER_MODE (vmode).require ();
|
|
+ machine_mode alt_vmode;
|
|
+ for (unsigned int j = 0; j < modes.length (); ++j)
|
|
+ if (related_vector_mode (modes[j], mode).exists (&alt_vmode)
|
|
+ && known_ge (GET_MODE_SIZE (alt_vmode), GET_MODE_SIZE (vmode)))
|
|
+ vmode = alt_vmode;
|
|
|
|
tree type = lang_hooks.types.type_for_mode (mode, 1);
|
|
if (type == NULL_TREE || TYPE_MODE (type) != mode)
|
|
continue;
|
|
- poly_uint64 nelts = exact_div (GET_MODE_SIZE (vmode),
|
|
- GET_MODE_SIZE (mode));
|
|
- type = build_vector_type (type, nelts);
|
|
- if (TYPE_MODE (type) != vmode)
|
|
- continue;
|
|
+ type = build_vector_type_for_mode (type, vmode);
|
|
+ /* The functions above are not allowed to return invalid modes. */
|
|
+ gcc_assert (TYPE_MODE (type) == vmode);
|
|
if (TYPE_ALIGN_UNIT (type) > al)
|
|
al = TYPE_ALIGN_UNIT (type);
|
|
}
|
|
diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
|
|
index 71c73fb43cc..61de7dc283b 100644
|
|
--- a/gcc/optabs-query.c
|
|
+++ b/gcc/optabs-query.c
|
|
@@ -354,11 +354,8 @@ can_conditionally_move_p (machine_mode mode)
|
|
opt_machine_mode
|
|
qimode_for_vec_perm (machine_mode mode)
|
|
{
|
|
- machine_mode qimode;
|
|
- if (GET_MODE_INNER (mode) != QImode
|
|
- && mode_for_vector (QImode, GET_MODE_SIZE (mode)).exists (&qimode)
|
|
- && VECTOR_MODE_P (qimode))
|
|
- return qimode;
|
|
+ if (GET_MODE_INNER (mode) != QImode)
|
|
+ return related_vector_mode (mode, QImode, GET_MODE_SIZE (mode));
|
|
return opt_machine_mode ();
|
|
}
|
|
|
|
@@ -587,22 +584,21 @@ can_vec_mask_load_store_p (machine_mode mode,
|
|
if (!VECTOR_MODE_P (vmode))
|
|
return false;
|
|
|
|
- if ((targetm.vectorize.get_mask_mode
|
|
- (GET_MODE_NUNITS (vmode), GET_MODE_SIZE (vmode)).exists (&mask_mode))
|
|
+ if (targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
|
|
&& convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
|
|
return true;
|
|
|
|
- auto_vector_sizes vector_sizes;
|
|
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
|
- for (unsigned int i = 0; i < vector_sizes.length (); ++i)
|
|
+ auto_vector_modes vector_modes;
|
|
+ targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
|
|
+ for (unsigned int i = 0; i < vector_modes.length (); ++i)
|
|
{
|
|
- poly_uint64 cur = vector_sizes[i];
|
|
+ poly_uint64 cur = GET_MODE_SIZE (vector_modes[i]);
|
|
poly_uint64 nunits;
|
|
if (!multiple_p (cur, GET_MODE_SIZE (smode), &nunits))
|
|
continue;
|
|
if (mode_for_vector (smode, nunits).exists (&vmode)
|
|
&& VECTOR_MODE_P (vmode)
|
|
- && targetm.vectorize.get_mask_mode (nunits, cur).exists (&mask_mode)
|
|
+ && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
|
|
&& convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
|
|
return true;
|
|
}
|
|
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
|
|
index 8157798cc71..341e02bd51c 100644
|
|
--- a/gcc/optabs-tree.c
|
|
+++ b/gcc/optabs-tree.c
|
|
@@ -300,6 +300,20 @@ supportable_convert_operation (enum tree_code code,
|
|
return true;
|
|
}
|
|
|
|
+ if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
|
|
+ && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
|
|
+ {
|
|
+ *code1 = code;
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
|
|
+ && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
|
|
+ {
|
|
+ *code1 = code;
|
|
+ return true;
|
|
+ }
|
|
+
|
|
/* Now check for builtin. */
|
|
if (targetm.vectorize.builtin_conversion
|
|
&& targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
|
|
diff --git a/gcc/optabs.c b/gcc/optabs.c
|
|
index 7d7efe0a4a2..c2c1274ebdb 100644
|
|
--- a/gcc/optabs.c
|
|
+++ b/gcc/optabs.c
|
|
@@ -2095,8 +2095,8 @@ expand_twoval_binop (optab binoptab, rtx op0, rtx op1, rtx targ0, rtx targ1,
|
|
xop1 = avoid_expensive_constant (mode1, binoptab, 1, xop1, unsignedp);
|
|
|
|
create_fixed_operand (&ops[0], targ0);
|
|
- create_convert_operand_from (&ops[1], op0, mode, unsignedp);
|
|
- create_convert_operand_from (&ops[2], op1, mode, unsignedp);
|
|
+ create_convert_operand_from (&ops[1], xop0, mode, unsignedp);
|
|
+ create_convert_operand_from (&ops[2], xop1, mode, unsignedp);
|
|
create_fixed_operand (&ops[3], targ1);
|
|
if (maybe_expand_insn (icode, 4, ops))
|
|
return 1;
|
|
@@ -5486,7 +5486,7 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
|
|
struct expand_operand ops[4];
|
|
|
|
gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT
|
|
- || mode_for_int_vector (tmode).require () == smode);
|
|
+ || related_int_vector_mode (tmode).require () == smode);
|
|
create_output_operand (&ops[0], target, tmode);
|
|
create_input_operand (&ops[3], sel, smode);
|
|
|
|
@@ -5611,8 +5611,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
|
|
/* The optabs are only defined for selectors with the same width
|
|
as the values being permuted. */
|
|
machine_mode required_sel_mode;
|
|
- if (!mode_for_int_vector (mode).exists (&required_sel_mode)
|
|
- || !VECTOR_MODE_P (required_sel_mode))
|
|
+ if (!related_int_vector_mode (mode).exists (&required_sel_mode))
|
|
{
|
|
delete_insns_since (last);
|
|
return NULL_RTX;
|
|
diff --git a/gcc/params.def b/gcc/params.def
|
|
index 3f18642475a..b269045fb9c 100644
|
|
--- a/gcc/params.def
|
|
+++ b/gcc/params.def
|
|
@@ -1403,7 +1403,7 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS,
|
|
DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
|
|
"vect-epilogues-nomask",
|
|
"Enable loop epilogue vectorization using smaller vector size.",
|
|
- 0, 0, 1)
|
|
+ 1, 0, 1)
|
|
|
|
DEFPARAM(PARAM_UNROLL_JAM_MIN_PERCENT,
|
|
"unroll-jam-min-percent",
|
|
diff --git a/gcc/poly-int.h b/gcc/poly-int.h
|
|
index d68a652b5fa..ba39ca471be 100644
|
|
--- a/gcc/poly-int.h
|
|
+++ b/gcc/poly-int.h
|
|
@@ -1528,6 +1528,29 @@ constant_lower_bound (const poly_int_pod<N, Ca> &a)
|
|
return a.coeffs[0];
|
|
}
|
|
|
|
+/* Return the constant lower bound of A, given that it is no less than B. */
|
|
+
|
|
+template<unsigned int N, typename Ca, typename Cb>
|
|
+inline POLY_CONST_COEFF (Ca, Cb)
|
|
+constant_lower_bound_with_limit (const poly_int_pod<N, Ca> &a, const Cb &b)
|
|
+{
|
|
+ if (known_ge (a, b))
|
|
+ return a.coeffs[0];
|
|
+ return b;
|
|
+}
|
|
+
|
|
+/* Return the constant upper bound of A, given that it is no greater
|
|
+ than B. */
|
|
+
|
|
+template<unsigned int N, typename Ca, typename Cb>
|
|
+inline POLY_CONST_COEFF (Ca, Cb)
|
|
+constant_upper_bound_with_limit (const poly_int_pod<N, Ca> &a, const Cb &b)
|
|
+{
|
|
+ if (known_le (a, b))
|
|
+ return a.coeffs[0];
|
|
+ return b;
|
|
+}
|
|
+
|
|
/* Return a value that is known to be no greater than A and B. This
|
|
will be the greatest lower bound for some indeterminate values but
|
|
not necessarily for all. */
|
|
diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
|
|
index ebd69bde531..1af51f686c7 100644
|
|
--- a/gcc/read-rtl.c
|
|
+++ b/gcc/read-rtl.c
|
|
@@ -1282,7 +1282,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
|
|
m = add_mapping (&substs, subst_iters_table, attr_operands[1]);
|
|
end_ptr = &m->values;
|
|
end_ptr = add_map_value (end_ptr, 1, "");
|
|
- end_ptr = add_map_value (end_ptr, 2, "");
|
|
+ add_map_value (end_ptr, 2, "");
|
|
|
|
add_define_attr_for_define_subst (attr_operands[1], queue);
|
|
}
|
|
@@ -1290,7 +1290,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
|
|
m = add_mapping (&substs, subst_attrs_table, attr_operands[0]);
|
|
end_ptr = &m->values;
|
|
end_ptr = add_map_value (end_ptr, 1, attr_operands[2]);
|
|
- end_ptr = add_map_value (end_ptr, 2, attr_operands[3]);
|
|
+ add_map_value (end_ptr, 2, attr_operands[3]);
|
|
}
|
|
|
|
/* Check newly-created code iterator ITERATOR to see whether every code has the
|
|
diff --git a/gcc/regrename.c b/gcc/regrename.c
|
|
index 637b3cbe6d7..5259d565e58 100644
|
|
--- a/gcc/regrename.c
|
|
+++ b/gcc/regrename.c
|
|
@@ -1426,10 +1426,9 @@ scan_rtx (rtx_insn *insn, rtx *loc, enum reg_class cl, enum scan_actions action,
|
|
{
|
|
const char *fmt;
|
|
rtx x = *loc;
|
|
- enum rtx_code code = GET_CODE (x);
|
|
int i, j;
|
|
|
|
- code = GET_CODE (x);
|
|
+ enum rtx_code code = GET_CODE (x);
|
|
switch (code)
|
|
{
|
|
case CONST:
|
|
diff --git a/gcc/reorg.c b/gcc/reorg.c
|
|
index 81349382b81..bdfcf8851cd 100644
|
|
--- a/gcc/reorg.c
|
|
+++ b/gcc/reorg.c
|
|
@@ -2708,14 +2708,13 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
|
|
&& GET_CODE (PATTERN (new_thread)) != ASM_INPUT
|
|
&& asm_noperands (PATTERN (new_thread)) < 0)
|
|
{
|
|
- rtx pat = PATTERN (new_thread);
|
|
rtx dest;
|
|
rtx src;
|
|
|
|
/* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
|
|
above. */
|
|
trial = as_a <rtx_insn *> (new_thread);
|
|
- pat = PATTERN (trial);
|
|
+ rtx pat = PATTERN (trial);
|
|
|
|
if (!NONJUMP_INSN_P (trial)
|
|
|| GET_CODE (pat) != SET
|
|
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
|
|
index 50bbb79655b..bdbd1b98eba 100644
|
|
--- a/gcc/simplify-rtx.c
|
|
+++ b/gcc/simplify-rtx.c
|
|
@@ -6709,6 +6709,17 @@ simplify_subreg (machine_mode outermode, rtx op,
|
|
}
|
|
}
|
|
|
|
+ /* If OP is a vector comparison and the subreg is not changing the
|
|
+ number of elements or the size of the elements, change the result
|
|
+ of the comparison to the new mode. */
|
|
+ if (COMPARISON_P (op)
|
|
+ && VECTOR_MODE_P (outermode)
|
|
+ && VECTOR_MODE_P (innermode)
|
|
+ && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode))
|
|
+ && known_eq (GET_MODE_UNIT_SIZE (outermode),
|
|
+ GET_MODE_UNIT_SIZE (innermode)))
|
|
+ return simplify_gen_relational (GET_CODE (op), outermode, innermode,
|
|
+ XEXP (op, 0), XEXP (op, 1));
|
|
return NULL_RTX;
|
|
}
|
|
|
|
diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
|
|
index 5d6f2e0166c..a054b7887e7 100644
|
|
--- a/gcc/stor-layout.c
|
|
+++ b/gcc/stor-layout.c
|
|
@@ -514,18 +514,43 @@ mode_for_vector (scalar_mode innermode, poly_uint64 nunits)
|
|
return opt_machine_mode ();
|
|
}
|
|
|
|
-/* Return the mode for a vector that has NUNITS integer elements of
|
|
- INT_BITS bits each, if such a mode exists. The mode can be either
|
|
- an integer mode or a vector mode. */
|
|
+/* If a piece of code is using vector mode VECTOR_MODE and also wants
|
|
+ to operate on elements of mode ELEMENT_MODE, return the vector mode
|
|
+ it should use for those elements. If NUNITS is nonzero, ensure that
|
|
+ the mode has exactly NUNITS elements, otherwise pick whichever vector
|
|
+ size pairs the most naturally with VECTOR_MODE; this may mean choosing
|
|
+ a mode with a different size and/or number of elements, depending on
|
|
+ what the target prefers. Return an empty opt_machine_mode if there
|
|
+ is no supported vector mode with the required properties.
|
|
+
|
|
+ Unlike mode_for_vector. any returned mode is guaranteed to satisfy
|
|
+ both VECTOR_MODE_P and targetm.vector_mode_supported_p. */
|
|
|
|
opt_machine_mode
|
|
-mode_for_int_vector (unsigned int int_bits, poly_uint64 nunits)
|
|
+related_vector_mode (machine_mode vector_mode, scalar_mode element_mode,
|
|
+ poly_uint64 nunits)
|
|
{
|
|
+ gcc_assert (VECTOR_MODE_P (vector_mode));
|
|
+ return targetm.vectorize.related_mode (vector_mode, element_mode, nunits);
|
|
+}
|
|
+
|
|
+/* If a piece of code is using vector mode VECTOR_MODE and also wants
|
|
+ to operate on integer vectors with the same element size and number
|
|
+ of elements, return the vector mode it should use. Return an empty
|
|
+ opt_machine_mode if there is no supported vector mode with the
|
|
+ required properties.
|
|
+
|
|
+ Unlike mode_for_vector. any returned mode is guaranteed to satisfy
|
|
+ both VECTOR_MODE_P and targetm.vector_mode_supported_p. */
|
|
+
|
|
+opt_machine_mode
|
|
+related_int_vector_mode (machine_mode vector_mode)
|
|
+{
|
|
+ gcc_assert (VECTOR_MODE_P (vector_mode));
|
|
scalar_int_mode int_mode;
|
|
- machine_mode vec_mode;
|
|
- if (int_mode_for_size (int_bits, 0).exists (&int_mode)
|
|
- && mode_for_vector (int_mode, nunits).exists (&vec_mode))
|
|
- return vec_mode;
|
|
+ if (int_mode_for_mode (GET_MODE_INNER (vector_mode)).exists (&int_mode))
|
|
+ return related_vector_mode (vector_mode, int_mode,
|
|
+ GET_MODE_NUNITS (vector_mode));
|
|
return opt_machine_mode ();
|
|
}
|
|
|
|
diff --git a/gcc/target.def b/gcc/target.def
|
|
index 66cee075018..f998470fffd 100644
|
|
--- a/gcc/target.def
|
|
+++ b/gcc/target.def
|
|
@@ -1894,33 +1894,80 @@ reached. The default is @var{mode} which means no splitting.",
|
|
/* Returns a mask of vector sizes to iterate over when auto-vectorizing
|
|
after processing the preferred one derived from preferred_simd_mode. */
|
|
DEFHOOK
|
|
-(autovectorize_vector_sizes,
|
|
- "If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not\n\
|
|
-the only one that is worth considering, this hook should add all suitable\n\
|
|
-vector sizes to @var{sizes}, in order of decreasing preference. The first\n\
|
|
-one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
|
|
+(autovectorize_vector_modes,
|
|
+ "If using the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}\n\
|
|
+is not the only approach worth considering, this hook should add one mode to\n\
|
|
+@var{modes} for each useful alternative approach. These modes are then\n\
|
|
+passed to @code{TARGET_VECTORIZE_RELATED_MODE} to obtain the vector mode\n\
|
|
+for a given element mode.\n\
|
|
+\n\
|
|
+The modes returned in @var{modes} should use the smallest element mode\n\
|
|
+possible for the vectorization approach that they represent, preferring\n\
|
|
+integer modes over floating-poing modes in the event of a tie. The first\n\
|
|
+mode should be the @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} for its\n\
|
|
+element mode.\n\
|
|
+\n\
|
|
+If @var{all} is true, add suitable vector modes even when they are generally\n\
|
|
+not expected to be worthwhile.\n\
|
|
+\n\
|
|
+The hook returns a bitmask of flags that control how the modes in\n\
|
|
+@var{modes} are used. The flags are:\n\
|
|
+@table @code\n\
|
|
+@item VECT_COMPARE_COSTS\n\
|
|
+Tells the loop vectorizer to try all the provided modes and pick the one\n\
|
|
+with the lowest cost. By default the vectorizer will choose the first\n\
|
|
+mode that works.\n\
|
|
+@end table\n\
|
|
\n\
|
|
The hook does not need to do anything if the vector returned by\n\
|
|
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
|
|
-for autovectorization. The default implementation does nothing.",
|
|
- void,
|
|
- (vector_sizes *sizes),
|
|
- default_autovectorize_vector_sizes)
|
|
+for autovectorization. The default implementation adds no modes and\n\
|
|
+returns 0.",
|
|
+ unsigned int,
|
|
+ (vector_modes *modes, bool all),
|
|
+ default_autovectorize_vector_modes)
|
|
+
|
|
+DEFHOOK
|
|
+(related_mode,
|
|
+ "If a piece of code is using vector mode @var{vector_mode} and also wants\n\
|
|
+to operate on elements of mode @var{element_mode}, return the vector mode\n\
|
|
+it should use for those elements. If @var{nunits} is nonzero, ensure that\n\
|
|
+the mode has exactly @var{nunits} elements, otherwise pick whichever vector\n\
|
|
+size pairs the most naturally with @var{vector_mode}. Return an empty\n\
|
|
+@code{opt_machine_mode} if there is no supported vector mode with the\n\
|
|
+required properties.\n\
|
|
+\n\
|
|
+There is no prescribed way of handling the case in which @var{nunits}\n\
|
|
+is zero. One common choice is to pick a vector mode with the same size\n\
|
|
+as @var{vector_mode}; this is the natural choice if the target has a\n\
|
|
+fixed vector size. Another option is to choose a vector mode with the\n\
|
|
+same number of elements as @var{vector_mode}; this is the natural choice\n\
|
|
+if the target has a fixed number of elements. Alternatively, the hook\n\
|
|
+might choose a middle ground, such as trying to keep the number of\n\
|
|
+elements as similar as possible while applying maximum and minimum\n\
|
|
+vector sizes.\n\
|
|
+\n\
|
|
+The default implementation uses @code{mode_for_vector} to find the\n\
|
|
+requested mode, returning a mode with the same size as @var{vector_mode}\n\
|
|
+when @var{nunits} is zero. This is the correct behavior for most targets.",
|
|
+ opt_machine_mode,
|
|
+ (machine_mode vector_mode, scalar_mode element_mode, poly_uint64 nunits),
|
|
+ default_vectorize_related_mode)
|
|
|
|
/* Function to get a target mode for a vector mask. */
|
|
DEFHOOK
|
|
(get_mask_mode,
|
|
- "A vector mask is a value that holds one boolean result for every element\n\
|
|
-in a vector. This hook returns the machine mode that should be used to\n\
|
|
-represent such a mask when the vector in question is @var{length} bytes\n\
|
|
-long and contains @var{nunits} elements. The hook returns an empty\n\
|
|
-@code{opt_machine_mode} if no such mode exists.\n\
|
|
-\n\
|
|
-The default implementation returns the mode of an integer vector that\n\
|
|
-is @var{length} bytes long and that contains @var{nunits} elements,\n\
|
|
-if such a mode exists.",
|
|
+ "Return the mode to use for a vector mask that holds one boolean\n\
|
|
+result for each element of vector mode @var{mode}. The returned mask mode\n\
|
|
+can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of\n\
|
|
+booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class\n\
|
|
+@code{MODE_INT}). Return an empty @code{opt_machine_mode} if no such\n\
|
|
+mask mode exists.\n\
|
|
+\n\
|
|
+The default implementation returns a @code{MODE_VECTOR_INT} with the\n\
|
|
+same size and number of elements as @var{mode}, if such a mode exists.",
|
|
opt_machine_mode,
|
|
- (poly_uint64 nunits, poly_uint64 length),
|
|
+ (machine_mode mode),
|
|
default_get_mask_mode)
|
|
|
|
/* Function to say whether a masked operation is expensive when the
|
|
diff --git a/gcc/target.h b/gcc/target.h
|
|
index 008932b5dbd..057e6ae8768 100644
|
|
--- a/gcc/target.h
|
|
+++ b/gcc/target.h
|
|
@@ -199,11 +199,19 @@ enum vect_cost_model_location {
|
|
class vec_perm_indices;
|
|
|
|
/* The type to use for lists of vector sizes. */
|
|
-typedef vec<poly_uint64> vector_sizes;
|
|
+typedef vec<machine_mode> vector_modes;
|
|
|
|
/* Same, but can be used to construct local lists that are
|
|
automatically freed. */
|
|
-typedef auto_vec<poly_uint64, 8> auto_vector_sizes;
|
|
+typedef auto_vec<machine_mode, 8> auto_vector_modes;
|
|
+
|
|
+/* Flags returned by TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES:
|
|
+
|
|
+ VECT_COMPARE_COSTS
|
|
+ Tells the loop vectorizer to try all the provided modes and
|
|
+ pick the one with the lowest cost. By default the vectorizer
|
|
+ will choose the first mode that works. */
|
|
+const unsigned int VECT_COMPARE_COSTS = 1U << 0;
|
|
|
|
/* The target structure. This holds all the backend hooks. */
|
|
#define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME;
|
|
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
|
|
index 02b9dc59611..6396f6f4bdf 100644
|
|
--- a/gcc/targhooks.c
|
|
+++ b/gcc/targhooks.c
|
|
@@ -1312,32 +1312,39 @@ default_split_reduction (machine_mode mode)
|
|
return mode;
|
|
}
|
|
|
|
-/* By default only the size derived from the preferred vector mode
|
|
- is tried. */
|
|
+/* By default only the preferred vector mode is tried. */
|
|
|
|
-void
|
|
-default_autovectorize_vector_sizes (vector_sizes *)
|
|
+unsigned int
|
|
+default_autovectorize_vector_modes (vector_modes *, bool)
|
|
{
|
|
+ return 0;
|
|
}
|
|
|
|
-/* By default a vector of integers is used as a mask. */
|
|
+/* The default implementation of TARGET_VECTORIZE_RELATED_MODE. */
|
|
|
|
opt_machine_mode
|
|
-default_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
|
|
-{
|
|
- unsigned int elem_size = vector_element_size (vector_size, nunits);
|
|
- scalar_int_mode elem_mode
|
|
- = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
|
|
- machine_mode vector_mode;
|
|
+default_vectorize_related_mode (machine_mode vector_mode,
|
|
+ scalar_mode element_mode,
|
|
+ poly_uint64 nunits)
|
|
+{
|
|
+ machine_mode result_mode;
|
|
+ if ((maybe_ne (nunits, 0U)
|
|
+ || multiple_p (GET_MODE_SIZE (vector_mode),
|
|
+ GET_MODE_SIZE (element_mode), &nunits))
|
|
+ && mode_for_vector (element_mode, nunits).exists (&result_mode)
|
|
+ && VECTOR_MODE_P (result_mode)
|
|
+ && targetm.vector_mode_supported_p (result_mode))
|
|
+ return result_mode;
|
|
|
|
- gcc_assert (known_eq (elem_size * nunits, vector_size));
|
|
+ return opt_machine_mode ();
|
|
+}
|
|
|
|
- if (mode_for_vector (elem_mode, nunits).exists (&vector_mode)
|
|
- && VECTOR_MODE_P (vector_mode)
|
|
- && targetm.vector_mode_supported_p (vector_mode))
|
|
- return vector_mode;
|
|
+/* By default a vector of integers is used as a mask. */
|
|
|
|
- return opt_machine_mode ();
|
|
+opt_machine_mode
|
|
+default_get_mask_mode (machine_mode mode)
|
|
+{
|
|
+ return related_int_vector_mode (mode);
|
|
}
|
|
|
|
/* By default consider masked stores to be expensive. */
|
|
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
|
|
index 59436278dcf..2d599190891 100644
|
|
--- a/gcc/targhooks.h
|
|
+++ b/gcc/targhooks.h
|
|
@@ -110,8 +110,11 @@ default_builtin_support_vector_misalignment (machine_mode mode,
|
|
int, bool);
|
|
extern machine_mode default_preferred_simd_mode (scalar_mode mode);
|
|
extern machine_mode default_split_reduction (machine_mode);
|
|
-extern void default_autovectorize_vector_sizes (vector_sizes *);
|
|
-extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
|
|
+extern unsigned int default_autovectorize_vector_modes (vector_modes *, bool);
|
|
+extern opt_machine_mode default_vectorize_related_mode (machine_mode,
|
|
+ scalar_mode,
|
|
+ poly_uint64);
|
|
+extern opt_machine_mode default_get_mask_mode (machine_mode);
|
|
extern bool default_empty_mask_is_expensive (unsigned);
|
|
extern void *default_init_cost (struct loop *);
|
|
extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
|
|
diff --git a/gcc/testsuite/g++.dg/opt/pr92317.C b/gcc/testsuite/g++.dg/opt/pr92317.C
|
|
new file mode 100644
|
|
index 00000000000..2bb9729fc96
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/g++.dg/opt/pr92317.C
|
|
@@ -0,0 +1,51 @@
|
|
+// Copied from pr87967.C
|
|
+// { dg-do compile { target c++11 } }
|
|
+// { dg-options "-O2 -ftree-vectorize -fno-tree-pre --param vect-epilogues-nomask=1" }
|
|
+
|
|
+void h();
|
|
+template <typename b> struct k { using d = b; };
|
|
+template <typename b, template <typename> class> using e = k<b>;
|
|
+template <typename b, template <typename> class f>
|
|
+using g = typename e<b, f>::d;
|
|
+struct l {
|
|
+ template <typename i> using ab = typename i::j;
|
|
+};
|
|
+struct n : l {
|
|
+ using j = g<char *, ab>;
|
|
+};
|
|
+class o {
|
|
+public:
|
|
+ long r();
|
|
+};
|
|
+char m;
|
|
+char s() {
|
|
+ if (m)
|
|
+ return '0';
|
|
+ return 'A';
|
|
+}
|
|
+class t {
|
|
+public:
|
|
+ typedef char *ad;
|
|
+ ad m_fn2();
|
|
+};
|
|
+void fn3() {
|
|
+ char *a;
|
|
+ t b;
|
|
+ bool p = false;
|
|
+ while (*a) {
|
|
+ h();
|
|
+ o c;
|
|
+ if (*a)
|
|
+ a++;
|
|
+ if (c.r()) {
|
|
+ n::j q;
|
|
+ for (t::ad d = b.m_fn2(), e; d != e; d++) {
|
|
+ char f = *q;
|
|
+ *d = f + s();
|
|
+ }
|
|
+ p = true;
|
|
+ }
|
|
+ }
|
|
+ if (p)
|
|
+ throw;
|
|
+}
|
|
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr90883.C b/gcc/testsuite/g++.dg/tree-ssa/pr90883.C
|
|
new file mode 100644
|
|
index 00000000000..0e622f263d2
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr90883.C
|
|
@@ -0,0 +1,20 @@
|
|
+// { dg-options "-O2 -Os -fdump-tree-dse-details -std=c++11 --param max-inline-insns-size=1" }
|
|
+
|
|
+
|
|
+ class C
|
|
+ {
|
|
+ char a[7]{};
|
|
+ int b{};
|
|
+ };
|
|
+
|
|
+ C slow()
|
|
+ {
|
|
+ return {};
|
|
+ }
|
|
+
|
|
+
|
|
+// We want to match enough here to capture that we deleted an empty
|
|
+// constructor store
|
|
+// aarch64 and mips will expand to loop to clear because CLEAR_RATIO.
|
|
+// { dg-final { scan-tree-dump "Deleted redundant store: .*\.a = {}" "dse1" { xfail { aarch64-*-* mips*-*-* } } } }
|
|
+
|
|
diff --git a/gcc/testsuite/gcc.dg/pr92162.c b/gcc/testsuite/gcc.dg/pr92162.c
|
|
new file mode 100644
|
|
index 00000000000..ed82595a752
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/pr92162.c
|
|
@@ -0,0 +1,10 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-Ofast" } */
|
|
+
|
|
+short int s8;
|
|
+
|
|
+void __attribute__ ((simd))
|
|
+gn (void)
|
|
+{
|
|
+ s8 = 0;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr91896.c b/gcc/testsuite/gcc.dg/torture/pr91896.c
|
|
new file mode 100644
|
|
index 00000000000..e728538bb9a
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/torture/pr91896.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-additional-options "-ftree-vectorize" } */
|
|
+
|
|
+unsigned int
|
|
+zj (unsigned int et)
|
|
+{
|
|
+ signed char jr = 0;
|
|
+
|
|
+ do {
|
|
+ et *= 3;
|
|
+ jr += 2;
|
|
+ } while (jr >= 0);
|
|
+
|
|
+ if (et == (unsigned int) jr)
|
|
+ et = 0;
|
|
+
|
|
+ return et;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr92069.c b/gcc/testsuite/gcc.dg/torture/pr92069.c
|
|
new file mode 100644
|
|
index 00000000000..806ff5fba14
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92069.c
|
|
@@ -0,0 +1,19 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-additional-options "-ftree-vectorize" } */
|
|
+
|
|
+unsigned int a, c, d;
|
|
+double b;
|
|
+void e()
|
|
+{
|
|
+ for (; d; d++)
|
|
+ {
|
|
+ double f;
|
|
+ a = 2;
|
|
+ for (; a; a++)
|
|
+ {
|
|
+ c = b;
|
|
+ b = f;
|
|
+ f = c;
|
|
+ }
|
|
+ }
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr92173.c b/gcc/testsuite/gcc.dg/torture/pr92173.c
|
|
new file mode 100644
|
|
index 00000000000..fcb3548b716
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92173.c
|
|
@@ -0,0 +1,11 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-additional-options "-ftree-vectorize" } */
|
|
+
|
|
+unsigned int
|
|
+yo (unsigned int o0, signed char s1)
|
|
+{
|
|
+ for (s1 = 0; s1 < 1; s1 -= 2)
|
|
+ o0 += o0;
|
|
+
|
|
+ return o0 + s1;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr92241.c b/gcc/testsuite/gcc.dg/torture/pr92241.c
|
|
new file mode 100644
|
|
index 00000000000..331d03b3d44
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92241.c
|
|
@@ -0,0 +1,13 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-additional-options "-ftree-vectorize" } */
|
|
+
|
|
+int a, b;
|
|
+char c[2];
|
|
+void d() {
|
|
+ char e;
|
|
+ for (; b; b--) {
|
|
+ e = 0;
|
|
+ for (; e <= 1; e++)
|
|
+ a &= c[b + e] && 1;
|
|
+ }
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr92275.c b/gcc/testsuite/gcc.dg/torture/pr92275.c
|
|
new file mode 100644
|
|
index 00000000000..b9f70889758
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92275.c
|
|
@@ -0,0 +1,13 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-additional-options "-ftree-vectorize" } */
|
|
+
|
|
+unsigned long a, c;
|
|
+int *b, *b2;
|
|
+long d;
|
|
+
|
|
+void fn1()
|
|
+{
|
|
+ for (; b < b2; b++)
|
|
+ d += *b * c;
|
|
+ d *= a;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr92371.c b/gcc/testsuite/gcc.dg/torture/pr92371.c
|
|
new file mode 100644
|
|
index 00000000000..0c78d32f471
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92371.c
|
|
@@ -0,0 +1,12 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-additional-options "-ftree-vectorize" } */
|
|
+
|
|
+int a, b;
|
|
+void d()
|
|
+{
|
|
+ int c = sizeof(int);
|
|
+ for (; a; a++)
|
|
+ c *= sizeof(int);
|
|
+ c *= sizeof(int);
|
|
+ b = c;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c
|
|
new file mode 100644
|
|
index 00000000000..23a53bb4ad2
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c
|
|
@@ -0,0 +1,65 @@
|
|
+/* { dg-options "-O2 -fdump-tree-dse-details -fno-tree-fre" } */
|
|
+#include <string.h>
|
|
+#include <stdlib.h>
|
|
+
|
|
+struct X
|
|
+{
|
|
+ char mem0[10];
|
|
+ char mem1[10];
|
|
+};
|
|
+
|
|
+
|
|
+void blah (struct X);
|
|
+
|
|
+
|
|
+void
|
|
+foo1()
|
|
+{
|
|
+ struct X x = { };
|
|
+ memset (x.mem1, 0, sizeof x.mem1);
|
|
+ blah (x);
|
|
+}
|
|
+
|
|
+void
|
|
+foo2()
|
|
+{
|
|
+ struct X x = { };
|
|
+ x.mem1[5] = 0;
|
|
+ blah (x);
|
|
+}
|
|
+
|
|
+void
|
|
+bar1 ()
|
|
+{
|
|
+ struct X x;
|
|
+ memset (&x, 0, sizeof x);
|
|
+ memset (&x.mem1, 0, sizeof x.mem1);
|
|
+ blah (x);
|
|
+}
|
|
+void
|
|
+bar2 ()
|
|
+{
|
|
+ struct X x;
|
|
+ memset (&x, 0, sizeof x);
|
|
+ x.mem1[5] = 0;
|
|
+ blah (x);
|
|
+}
|
|
+
|
|
+void
|
|
+baz1 ()
|
|
+{
|
|
+ struct X *x = calloc (sizeof (struct X), 1);
|
|
+ memset (&x->mem1, 0, sizeof x->mem1);
|
|
+ blah (*x);
|
|
+}
|
|
+
|
|
+void
|
|
+baz2 ()
|
|
+{
|
|
+ struct X *x = calloc (sizeof (struct X), 1);
|
|
+ x->mem1[5] = 0;
|
|
+ blah (*x);
|
|
+}
|
|
+/* { dg-final { scan-tree-dump-times "Deleted redundant call" 3 "dse1" } } */
|
|
+/* { dg-final { scan-tree-dump-times "Deleted redundant store" 3 "dse1" } } */
|
|
+
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
|
|
new file mode 100644
|
|
index 00000000000..715c22ac6c6
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
|
|
@@ -0,0 +1,14 @@
|
|
+/* { dg-do compile } */
|
|
+
|
|
+int x[4], y[4], z[4];
|
|
+
|
|
+void
|
|
+f (void)
|
|
+{
|
|
+ x[0] += y[0] / z[0] * 2;
|
|
+ x[1] += y[1] / z[1] * 2;
|
|
+ x[2] += y[2] / z[2] * 2;
|
|
+ x[3] += y[3] / z[3] * 2;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_int } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
|
|
index 85f9a02582f..813b1af089a 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
|
|
@@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2)
|
|
}
|
|
|
|
/* Disable for SVE because for long or variable-length vectors we don't
|
|
- get an unrolled epilogue loop. */
|
|
-/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */
|
|
+ get an unrolled epilogue loop. Also disable for AArch64 Advanced SIMD,
|
|
+ because there we can vectorize the epilogue using mixed vector sizes. */
|
|
+/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
|
|
index 228190ab05d..877de4eb5be 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
#include "tree-vect.h"
|
|
|
|
extern float copysignf (float, float);
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
|
|
index 7a148e41d51..5f871289337 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float_strict } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
|
|
index 1d674504e2c..022d49f1175 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
|
|
index e4202b10d06..b5f8c3c88e4 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
|
|
@@ -46,4 +46,4 @@ int main (void)
|
|
}
|
|
|
|
/* Until we support multiple types in the inner loop */
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
|
|
index 50b4998bb6c..7049e4936b9 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
|
|
index c3b242157ce..d2ae7976781 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
|
|
index 470bbfb5537..243e01e6dad 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
|
|
index 805024d8058..e339590bacb 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
|
|
index 726c0de652f..c403a8302d8 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
|
|
index 4513c40b34f..dcb53701795 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr33804.c b/gcc/testsuite/gcc.dg/vect/pr33804.c
|
|
index 86babbe60e7..0db13674b42 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr33804.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr33804.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr53773.c b/gcc/testsuite/gcc.dg/vect/pr53773.c
|
|
index 0bcc021767e..7f8229571ec 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr53773.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr53773.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-additional-options "-fdump-tree-optimized" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65930-1.c b/gcc/testsuite/gcc.dg/vect/pr65930-1.c
|
|
new file mode 100644
|
|
index 00000000000..895fbf8869d
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65930-1.c
|
|
@@ -0,0 +1,26 @@
|
|
+/* { dg-require-effective-target vect_int } */
|
|
+
|
|
+#include "tree-vect.h"
|
|
+
|
|
+unsigned __attribute__((noipa))
|
|
+bar (unsigned int *x)
|
|
+{
|
|
+ int sum = 4;
|
|
+ x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
|
|
+ for (int i = 0; i < 16; ++i)
|
|
+ sum += x[i];
|
|
+ return sum;
|
|
+}
|
|
+
|
|
+int
|
|
+main()
|
|
+{
|
|
+ static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
|
|
+ = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
|
|
+ check_vect ();
|
|
+ if (bar (a) != 260)
|
|
+ abort ();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65930-2.c b/gcc/testsuite/gcc.dg/vect/pr65930-2.c
|
|
new file mode 100644
|
|
index 00000000000..9cfb9b102d9
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65930-2.c
|
|
@@ -0,0 +1,28 @@
|
|
+/* { dg-require-effective-target vect_int } */
|
|
+
|
|
+#include "tree-vect.h"
|
|
+
|
|
+int __attribute__((noipa))
|
|
+bar (unsigned int *x, int n)
|
|
+{
|
|
+ int sum = 4;
|
|
+ x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ sum += x[i*4+0]+ x[i*4 + 1] + x[i*4 + 2] + x[i*4 + 3];
|
|
+ return sum;
|
|
+}
|
|
+
|
|
+int
|
|
+main ()
|
|
+{
|
|
+ static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
|
|
+ = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
|
|
+ check_vect ();
|
|
+ if (bar (a, 4) != 260)
|
|
+ abort ();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
|
|
index 879819d576a..9fc74a1ab28 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_condition } */
|
|
|
|
#include "tree-vect.h"
|
|
@@ -41,5 +43,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
|
|
index f37aecab082..e4a1d9419c2 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
|
|
@@ -42,6 +42,6 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
|
|
index b84fd41bc63..a47f4146a29 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
|
|
@@ -42,5 +42,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
|
|
index e1d3ff52f5c..a703923151d 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_condition } */
|
|
|
|
#include "tree-vect.h"
|
|
@@ -41,5 +43,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
|
|
index 9f1e4e1eb6a..3b76fda2122 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_condition } */
|
|
|
|
#include "tree-vect.h"
|
|
@@ -41,5 +43,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
|
|
index 18d33c436a5..58ba5f764d0 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
|
|
@@ -42,5 +42,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
|
|
index 427abdb4140..6b4077e1a62 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
|
|
@@ -52,5 +52,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
|
|
index 186e03a6346..471fbe2da21 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_condition } */
|
|
|
|
#include "tree-vect.h"
|
|
@@ -41,6 +43,6 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
|
|
index c91b648aa05..4e3f765cd0c 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
|
|
@@ -53,5 +53,5 @@ main (void)
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
|
|
index b072c8d33a2..dde96d7a553 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
|
|
@@ -41,5 +41,5 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
|
|
index e43e0e473be..1f295306016 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
|
|
@@ -48,5 +48,5 @@ main ()
|
|
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */
|
|
/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
|
|
index f2405198a10..cbb9a6ff69a 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr80631-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* PR tree-optimization/80631 */
|
|
|
|
#include "tree-vect.h"
|
|
@@ -72,5 +74,5 @@ main ()
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
|
|
index b334ca2345b..61e11316af2 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* PR tree-optimization/80631 */
|
|
|
|
#include "tree-vect.h"
|
|
@@ -72,5 +74,5 @@ main ()
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
|
|
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
|
|
-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr92205.c b/gcc/testsuite/gcc.dg/vect/pr92205.c
|
|
new file mode 100644
|
|
index 00000000000..a031c1fe297
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/pr92205.c
|
|
@@ -0,0 +1,13 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-require-effective-target vect_int } */
|
|
+
|
|
+int b(int n, unsigned char *a)
|
|
+{
|
|
+ int d = 0;
|
|
+ a = __builtin_assume_aligned (a, __BIGGEST_ALIGNMENT__);
|
|
+ for (int c = 0; c < n; ++c)
|
|
+ d |= a[c];
|
|
+ return d;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c
|
|
index 7d330c787d1..d7c67fe2c6e 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-23.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-23.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-25.c b/gcc/testsuite/gcc.dg/vect/slp-25.c
|
|
index ff7eff202cb..1c33927c434 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-25.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-25.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-9.c b/gcc/testsuite/gcc.dg/vect/slp-9.c
|
|
index d0c94f1986b..d5212dca3dd 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-9.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-9.c
|
|
@@ -44,5 +44,5 @@ int main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } }*/
|
|
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
|
|
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target vect_widen_mult_hi_to_si } } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
|
|
index 07c96c00eb0..15dd59922fc 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
|
|
index fc689e46ba1..f457c11aa3c 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
|
|
index 88591c5bdcb..1fd15aa3c87 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c
|
|
new file mode 100644
|
|
index 00000000000..7d9255e48f2
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c
|
|
@@ -0,0 +1,31 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
+/* { dg-do compile } */
|
|
+/* { dg-require-effective-target vect_usad_char } */
|
|
+/* With AVX256 or more we do not pull off the trick eliding the epilogue. */
|
|
+/* { dg-additional-options "-mprefer-avx128" { target { x86_64-*-* i?86-*-* } } } */
|
|
+
|
|
+typedef unsigned char uint8_t;
|
|
+int x264_pixel_sad_8x8( uint8_t *pix1, uint8_t *pix2, int i_stride_pix2 )
|
|
+{
|
|
+ int i_sum = 0;
|
|
+ for( int y = 0; y < 8; y++ )
|
|
+ {
|
|
+ i_sum += __builtin_abs( pix1[0] - pix2[0] );
|
|
+ i_sum += __builtin_abs( pix1[1] - pix2[1] );
|
|
+ i_sum += __builtin_abs( pix1[2] - pix2[2] );
|
|
+ i_sum += __builtin_abs( pix1[3] - pix2[3] );
|
|
+ i_sum += __builtin_abs( pix1[4] - pix2[4] );
|
|
+ i_sum += __builtin_abs( pix1[5] - pix2[5] );
|
|
+ i_sum += __builtin_abs( pix1[6] - pix2[6] );
|
|
+ i_sum += __builtin_abs( pix1[7] - pix2[7] );
|
|
+ pix1 += 16;
|
|
+ pix2 += i_stride_pix2;
|
|
+ }
|
|
+ return i_sum;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "access with gaps requires scalar epilogue loop" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
|
|
index f5fb63e19f1..e3bfee33348 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include "tree-vect.h"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
|
|
index 4460d59b5a1..abb10fde45b 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
|
|
@@ -38,5 +38,5 @@ int main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
|
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
|
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
|
|
index 6e72c4878c2..0756119afb4 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
|
|
@@ -38,5 +38,5 @@ int main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
|
|
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
|
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
|
|
index 8a57eb69a91..f09c964fdc1 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-103.c b/gcc/testsuite/gcc.dg/vect/vect-103.c
|
|
index 4a9e1574eb0..2a4510482d4 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-103.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-103.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdlib.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-109.c b/gcc/testsuite/gcc.dg/vect/vect-109.c
|
|
index 9a507105899..ac5d0827899 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-109.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-109.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-skip-if "" { vect_no_align } } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-119.c b/gcc/testsuite/gcc.dg/vect/vect-119.c
|
|
index aa8c3002bff..29a9c51cd29 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-119.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-119.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-24.c b/gcc/testsuite/gcc.dg/vect/vect-24.c
|
|
index cbff6c55fa4..fa4c0620d29 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-24.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-24.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c
|
|
index 4f0472b5d0f..8a141f38400 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-26.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-26.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c
|
|
index 590217feee7..ac86b21aceb 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-27.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-27.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c
|
|
index 86ec2cc1ddf..bbd446dfe63 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-29.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-29.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c
|
|
index a65b4a62276..086cbf20c0a 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-42.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-42.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c
|
|
index 03ef2c0f671..f7f1fd28665 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-44.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-44.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
/* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c
|
|
index bac6ef6b8dd..b29fe47635a 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-48.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-48.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c
|
|
index c9500ca91e5..f43676896af 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-50.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-50.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
/* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c
|
|
index 0343d9a24d1..c20a4be2ede 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-52.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-52.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c
|
|
index 58201abe069..2b236e48e19 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-54.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-54.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c
|
|
index 8060b05e781..c914126ece5 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-56.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-56.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c
|
|
index 441af51860e..da4f9740e33 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-58.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-58.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c
|
|
index 3b7477c96ab..121c503c63a 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-60.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-60.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c
|
|
index 472d8d57549..9e8e91b7ae6 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-72.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-72.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
|
|
index 42b2b8d91aa..a3fb5053037 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-75.c b/gcc/testsuite/gcc.dg/vect/vect-75.c
|
|
index 2cdd7032242..88da97f0bb7 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-75.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-75.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
|
|
index 56ee797d10b..fb3e4992782 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-global.c b/gcc/testsuite/gcc.dg/vect/vect-77-global.c
|
|
index f0b73505d68..1580d6e075b 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-77-global.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-77-global.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
|
|
index c3ef8a36591..57e8da0a909 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-global.c b/gcc/testsuite/gcc.dg/vect/vect-78-global.c
|
|
index 241e7fa94b5..ea039b389b2 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-78-global.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-78-global.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
|
|
index decfbee318a..59e1aae0017 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-89.c b/gcc/testsuite/gcc.dg/vect/vect-89.c
|
|
index 051698eada2..356ab96d330 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-89.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-89.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-91.c b/gcc/testsuite/gcc.dg/vect/vect-91.c
|
|
index 9430da3290a..91264d9841d 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-91.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-91.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c
|
|
index b9a1ce23d02..9ceb0fbadcd 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-92.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-92.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-96.c b/gcc/testsuite/gcc.dg/vect/vect-96.c
|
|
index 0cb935b9f16..c0d6c37b21d 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-96.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-96.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
|
|
index c2b1c773047..3887120b747 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
|
|
@@ -15,3 +15,5 @@ fn1 ()
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump "improved number of alias checks from \[0-9\]* to 1" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
|
|
index 0e6285e4a23..b6cc309dbe8 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
|
|
@@ -65,3 +65,6 @@ main (void)
|
|
FOR_EACH_TYPE (DO_TEST)
|
|
return 0;
|
|
}
|
|
+
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
|
|
index a0d5abc3aa4..09a4ebfa69e 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
|
|
@@ -95,3 +95,6 @@ main (void)
|
|
/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-24, 24\)} "vect" { target vect_double } } } */
|
|
/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-32, 32\)} "vect" { target vect_double } } } */
|
|
/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
|
|
+
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
|
|
index 788cdfc3cdc..63a897f4bad 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
|
|
@@ -95,3 +95,6 @@ main (void)
|
|
/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 24\)} "vect" { target vect_double } } } */
|
|
/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 32\)} "vect" { target vect_double } } } */
|
|
/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
|
|
+
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
|
|
index 60bc4730724..812aa9027dd 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
|
|
@@ -18,4 +18,6 @@ f2 (int *x, long step2, int n)
|
|
|
|
/* { dg-final { scan-tree-dump {need run-time check that [^\n]*step1[^\n]* is nonzero} "vect" } } */
|
|
/* { dg-final { scan-tree-dump-not {need run-time check that [^\n]*step2[^\n]* is nonzero} "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 2 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
|
|
new file mode 100644
|
|
index 00000000000..1d148a04918
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
|
|
@@ -0,0 +1,64 @@
|
|
+#define N 200
|
|
+#define M 4
|
|
+
|
|
+typedef signed char sc;
|
|
+typedef unsigned char uc;
|
|
+typedef signed short ss;
|
|
+typedef unsigned short us;
|
|
+typedef int si;
|
|
+typedef unsigned int ui;
|
|
+typedef signed long long sll;
|
|
+typedef unsigned long long ull;
|
|
+
|
|
+#define FOR_EACH_TYPE(M) \
|
|
+ M (sc) M (uc) \
|
|
+ M (ss) M (us) \
|
|
+ M (si) M (ui) \
|
|
+ M (sll) M (ull) \
|
|
+ M (float) M (double)
|
|
+
|
|
+#define TEST_VALUE(I) ((I) * 17 / 2)
|
|
+
|
|
+#define ADD_TEST(TYPE) \
|
|
+ void __attribute__((noinline, noclone)) \
|
|
+ test_##TYPE (TYPE *a, TYPE *b) \
|
|
+ { \
|
|
+ for (int i = 0; i < N; i += 2) \
|
|
+ { \
|
|
+ TYPE b0 = b[i + 0]; \
|
|
+ TYPE b1 = b[i + 1]; \
|
|
+ a[i + 0] = b0 + 2; \
|
|
+ a[i + 1] = b1 + 3; \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+#define DO_TEST(TYPE) \
|
|
+ for (int j = 0; j < M; ++j) \
|
|
+ { \
|
|
+ TYPE a[N + M]; \
|
|
+ for (int i = 0; i < N + M; ++i) \
|
|
+ a[i] = TEST_VALUE (i); \
|
|
+ test_##TYPE (a + j, a); \
|
|
+ for (int i = 0; i < N; i += 2) \
|
|
+ { \
|
|
+ TYPE base1 = j == 0 ? TEST_VALUE (i) : a[i]; \
|
|
+ TYPE base2 = j <= 1 ? TEST_VALUE (i + 1) : a[i + 1]; \
|
|
+ if (a[i + j] != (TYPE) (base1 + 2) \
|
|
+ || a[i + j + 1] != (TYPE) (base2 + 3)) \
|
|
+ __builtin_abort (); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+FOR_EACH_TYPE (ADD_TEST)
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ FOR_EACH_TYPE (DO_TEST)
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */
|
|
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
|
|
new file mode 100644
|
|
index 00000000000..fbe3f8431ff
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
|
|
@@ -0,0 +1,61 @@
|
|
+#define N 200
|
|
+#define DIST 32
|
|
+
|
|
+typedef signed char sc;
|
|
+typedef unsigned char uc;
|
|
+typedef signed short ss;
|
|
+typedef unsigned short us;
|
|
+typedef int si;
|
|
+typedef unsigned int ui;
|
|
+typedef signed long long sll;
|
|
+typedef unsigned long long ull;
|
|
+
|
|
+#define FOR_EACH_TYPE(M) \
|
|
+ M (sc) M (uc) \
|
|
+ M (ss) M (us) \
|
|
+ M (si) M (ui) \
|
|
+ M (sll) M (ull) \
|
|
+ M (float) M (double)
|
|
+
|
|
+#define ADD_TEST(TYPE) \
|
|
+ void __attribute__((noinline, noclone)) \
|
|
+ test_##TYPE (TYPE *x, TYPE *y) \
|
|
+ { \
|
|
+ for (int i = 0; i < N; ++i) \
|
|
+ { \
|
|
+ x[i] = i; \
|
|
+ y[i] = 42 - i * 2; \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+#define DO_TEST(TYPE) \
|
|
+ for (int i = 0; i < DIST * 2; ++i) \
|
|
+ { \
|
|
+ TYPE a[N + DIST * 2] = {}; \
|
|
+ test_##TYPE (a + DIST, a + i); \
|
|
+ for (int j = 0; j < N + DIST * 2; ++j) \
|
|
+ { \
|
|
+ TYPE expected = 0; \
|
|
+ if (i > DIST && j >= i && j < i + N) \
|
|
+ expected = 42 - (j - i) * 2; \
|
|
+ if (j >= DIST && j < DIST + N) \
|
|
+ expected = j - DIST; \
|
|
+ if (i <= DIST && j >= i && j < i + N) \
|
|
+ expected = 42 - (j - i) * 2; \
|
|
+ if (expected != a[j]) \
|
|
+ __builtin_abort (); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+FOR_EACH_TYPE (ADD_TEST)
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ FOR_EACH_TYPE (DO_TEST)
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
|
|
new file mode 100644
|
|
index 00000000000..81c252dfc23
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
|
|
@@ -0,0 +1,66 @@
|
|
+#define N 200
|
|
+#define DIST 32
|
|
+
|
|
+typedef signed char sc;
|
|
+typedef unsigned char uc;
|
|
+typedef signed short ss;
|
|
+typedef unsigned short us;
|
|
+typedef int si;
|
|
+typedef unsigned int ui;
|
|
+typedef signed long long sll;
|
|
+typedef unsigned long long ull;
|
|
+
|
|
+#define FOR_EACH_TYPE(M) \
|
|
+ M (sc) M (uc) \
|
|
+ M (ss) M (us) \
|
|
+ M (si) M (ui) \
|
|
+ M (sll) M (ull) \
|
|
+ M (float) M (double)
|
|
+
|
|
+#define TEST_VALUE(I) ((I) * 13 / 2)
|
|
+
|
|
+#define ADD_TEST(TYPE) \
|
|
+ TYPE __attribute__((noinline, noclone)) \
|
|
+ test_##TYPE (TYPE *x, TYPE *y) \
|
|
+ { \
|
|
+ TYPE res = 0; \
|
|
+ for (int i = 0; i < N; ++i) \
|
|
+ { \
|
|
+ x[i] = i; \
|
|
+ res += y[i]; \
|
|
+ } \
|
|
+ return res; \
|
|
+ }
|
|
+
|
|
+#define DO_TEST(TYPE) \
|
|
+ for (int i = 0; i < DIST * 2; ++i) \
|
|
+ { \
|
|
+ TYPE a[N + DIST * 2]; \
|
|
+ for (int j = 0; j < N + DIST * 2; ++j) \
|
|
+ a[j] = TEST_VALUE (j); \
|
|
+ TYPE res = test_##TYPE (a + DIST, a + i); \
|
|
+ for (int j = 0; j < N; ++j) \
|
|
+ if (a[j + DIST] != (TYPE) j) \
|
|
+ __builtin_abort (); \
|
|
+ TYPE expected_res = 0; \
|
|
+ for (int j = i; j < i + N; ++j) \
|
|
+ if (i <= DIST && j >= DIST && j < DIST + N) \
|
|
+ expected_res += j - DIST; \
|
|
+ else \
|
|
+ expected_res += TEST_VALUE (j); \
|
|
+ if (expected_res != res) \
|
|
+ __builtin_abort (); \
|
|
+ }
|
|
+
|
|
+FOR_EACH_TYPE (ADD_TEST)
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ FOR_EACH_TYPE (DO_TEST)
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c
|
|
new file mode 100644
|
|
index 00000000000..c49c497c2d0
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c
|
|
@@ -0,0 +1,15 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-require-effective-target vect_load_lanes } */
|
|
+
|
|
+struct s { int x[100]; };
|
|
+
|
|
+void
|
|
+f (struct s *s1, int a, int b)
|
|
+{
|
|
+ for (int i = 0; i < 32; ++i)
|
|
+ s1->x[a + i] = s1->x[b + i * 2] + s1->x[b + i * 3];
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *[^\n]*MIXED_STEPS} "vect" } } */
|
|
+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
|
|
new file mode 100644
|
|
index 00000000000..9d0739151d9
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
|
|
@@ -0,0 +1,64 @@
|
|
+#define N 200
|
|
+#define DIST 32
|
|
+
|
|
+typedef signed char sc;
|
|
+typedef unsigned char uc;
|
|
+typedef signed short ss;
|
|
+typedef unsigned short us;
|
|
+typedef int si;
|
|
+typedef unsigned int ui;
|
|
+typedef signed long long sll;
|
|
+typedef unsigned long long ull;
|
|
+
|
|
+#define FOR_EACH_TYPE(M) \
|
|
+ M (sc) M (uc) \
|
|
+ M (ss) M (us) \
|
|
+ M (si) M (ui) \
|
|
+ M (sll) M (ull) \
|
|
+ M (float) M (double)
|
|
+
|
|
+#define TEST_VALUE(I) ((I) * 11 / 2)
|
|
+
|
|
+#define ADD_TEST(TYPE) \
|
|
+ TYPE a_##TYPE[N * 2]; \
|
|
+ void __attribute__((noinline, noclone)) \
|
|
+ test_##TYPE (int x, int y) \
|
|
+ { \
|
|
+ for (int i = 0; i < N; ++i) \
|
|
+ a_##TYPE[x - i] += a_##TYPE[y - i]; \
|
|
+ }
|
|
+
|
|
+#define DO_TEST(TYPE) \
|
|
+ for (int i = 0; i < DIST * 2; ++i) \
|
|
+ { \
|
|
+ for (int j = 0; j < N + DIST * 2; ++j) \
|
|
+ a_##TYPE[j] = TEST_VALUE (j); \
|
|
+ test_##TYPE (i + N - 1, DIST + N - 1); \
|
|
+ for (int j = 0; j < N + DIST * 2; ++j) \
|
|
+ { \
|
|
+ TYPE expected; \
|
|
+ if (j < i || j >= i + N) \
|
|
+ expected = TEST_VALUE (j); \
|
|
+ else if (i >= DIST) \
|
|
+ expected = ((TYPE) TEST_VALUE (j) \
|
|
+ + (TYPE) TEST_VALUE (j + DIST - i)); \
|
|
+ else \
|
|
+ expected = ((TYPE) TEST_VALUE (j) \
|
|
+ + a_##TYPE[j + DIST - i]); \
|
|
+ if (expected != a_##TYPE[j]) \
|
|
+ __builtin_abort (); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+FOR_EACH_TYPE (ADD_TEST)
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ FOR_EACH_TYPE (DO_TEST)
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
|
|
new file mode 100644
|
|
index 00000000000..7c0ff36a8c4
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
|
|
@@ -0,0 +1,62 @@
|
|
+#define N 200
|
|
+#define DIST 32
|
|
+
|
|
+typedef signed char sc;
|
|
+typedef unsigned char uc;
|
|
+typedef signed short ss;
|
|
+typedef unsigned short us;
|
|
+typedef int si;
|
|
+typedef unsigned int ui;
|
|
+typedef signed long long sll;
|
|
+typedef unsigned long long ull;
|
|
+
|
|
+#define FOR_EACH_TYPE(M) \
|
|
+ M (sc) M (uc) \
|
|
+ M (ss) M (us) \
|
|
+ M (si) M (ui) \
|
|
+ M (sll) M (ull) \
|
|
+ M (float) M (double)
|
|
+
|
|
+#define ADD_TEST(TYPE) \
|
|
+ TYPE a_##TYPE[N * 2]; \
|
|
+ void __attribute__((noinline, noclone)) \
|
|
+ test_##TYPE (int x, int y) \
|
|
+ { \
|
|
+ for (int i = 0; i < N; ++i) \
|
|
+ { \
|
|
+ a_##TYPE[i + x] = i; \
|
|
+ a_##TYPE[i + y] = 42 - i * 2; \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+#define DO_TEST(TYPE) \
|
|
+ for (int i = 0; i < DIST * 2; ++i) \
|
|
+ { \
|
|
+ __builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE)); \
|
|
+ test_##TYPE (DIST, i); \
|
|
+ for (int j = 0; j < N + DIST * 2; ++j) \
|
|
+ { \
|
|
+ TYPE expected = 0; \
|
|
+ if (i > DIST && j >= i && j < i + N) \
|
|
+ expected = 42 - (j - i) * 2; \
|
|
+ if (j >= DIST && j < DIST + N) \
|
|
+ expected = j - DIST; \
|
|
+ if (i <= DIST && j >= i && j < i + N) \
|
|
+ expected = 42 - (j - i) * 2; \
|
|
+ if (expected != a_##TYPE[j]) \
|
|
+ __builtin_abort (); \
|
|
+ } \
|
|
+ }
|
|
+
|
|
+FOR_EACH_TYPE (ADD_TEST)
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ FOR_EACH_TYPE (DO_TEST)
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
|
|
new file mode 100644
|
|
index 00000000000..8a699ebfda8
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
|
|
@@ -0,0 +1,66 @@
|
|
+#define N 200
|
|
+#define DIST 32
|
|
+
|
|
+typedef signed char sc;
|
|
+typedef unsigned char uc;
|
|
+typedef signed short ss;
|
|
+typedef unsigned short us;
|
|
+typedef int si;
|
|
+typedef unsigned int ui;
|
|
+typedef signed long long sll;
|
|
+typedef unsigned long long ull;
|
|
+
|
|
+#define FOR_EACH_TYPE(M) \
|
|
+ M (sc) M (uc) \
|
|
+ M (ss) M (us) \
|
|
+ M (si) M (ui) \
|
|
+ M (sll) M (ull) \
|
|
+ M (float) M (double)
|
|
+
|
|
+#define TEST_VALUE(I) ((I) * 11 / 2)
|
|
+
|
|
+#define ADD_TEST(TYPE) \
|
|
+ TYPE a_##TYPE[N * 2]; \
|
|
+ TYPE __attribute__((noinline, noclone)) \
|
|
+ test_##TYPE (int x, int y) \
|
|
+ { \
|
|
+ TYPE res = 0; \
|
|
+ for (int i = 0; i < N; ++i) \
|
|
+ { \
|
|
+ a_##TYPE[i + x] = i; \
|
|
+ res += a_##TYPE[i + y]; \
|
|
+ } \
|
|
+ return res; \
|
|
+ }
|
|
+
|
|
+#define DO_TEST(TYPE) \
|
|
+ for (int i = 0; i < DIST * 2; ++i) \
|
|
+ { \
|
|
+ for (int j = 0; j < N + DIST * 2; ++j) \
|
|
+ a_##TYPE[j] = TEST_VALUE (j); \
|
|
+ TYPE res = test_##TYPE (DIST, i); \
|
|
+ for (int j = 0; j < N; ++j) \
|
|
+ if (a_##TYPE[j + DIST] != (TYPE) j) \
|
|
+ __builtin_abort (); \
|
|
+ TYPE expected_res = 0; \
|
|
+ for (int j = i; j < i + N; ++j) \
|
|
+ if (i <= DIST && j >= DIST && j < DIST + N) \
|
|
+ expected_res += j - DIST; \
|
|
+ else \
|
|
+ expected_res += TEST_VALUE (j); \
|
|
+ if (expected_res != res) \
|
|
+ __builtin_abort (); \
|
|
+ }
|
|
+
|
|
+FOR_EACH_TYPE (ADD_TEST)
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ FOR_EACH_TYPE (DO_TEST)
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
|
|
index 0569ca487b5..7e5df138999 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
|
|
@@ -58,3 +58,7 @@ main (void)
|
|
FOR_EACH_TYPE (DO_TEST)
|
|
return 0;
|
|
}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
|
|
index 5685bfee576..a7fc1fcebbb 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
|
|
@@ -17,7 +17,7 @@ typedef unsigned long long ull;
|
|
M (sll) M (ull) \
|
|
M (float) M (double)
|
|
|
|
-#define TEST_VALUE(I) ((I) * 5 / 2)
|
|
+#define TEST_VALUE(I) ((I) * 17 / 2)
|
|
|
|
#define ADD_TEST(TYPE) \
|
|
void __attribute__((noinline, noclone)) \
|
|
@@ -51,3 +51,7 @@ main (void)
|
|
FOR_EACH_TYPE (DO_TEST)
|
|
return 0;
|
|
}
|
|
+
|
|
+/* { dg-final { scan-tree-dump {flags: [^\n]*ARBITRARY\n} "vect" { target vect_int } } } */
|
|
+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
|
|
index 3c98b07e425..d29b352b832 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
|
|
@@ -1,4 +1,4 @@
|
|
-/* { dg-require-effective-target vect_bswap } */
|
|
+/* { dg-additional-options "-msse4" { target sse4_runtime } } */
|
|
|
|
#include "tree-vect.h"
|
|
|
|
@@ -39,4 +39,4 @@ main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_bswap || sse4_runtime } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c
|
|
new file mode 100644
|
|
index 00000000000..730dc4e8352
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c
|
|
@@ -0,0 +1,5 @@
|
|
+/* { dg-additional-options "-msse2 -mno-sse3" { target sse2_runtime } } */
|
|
+
|
|
+#include "vect-bswap16.c"
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_shift } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
|
|
new file mode 100644
|
|
index 00000000000..bb99b95eca5
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
|
|
@@ -0,0 +1,47 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
+/* { dg-require-effective-target vect_condition } */
|
|
+/* { dg-require-effective-target vect_float } */
|
|
+
|
|
+#include "tree-vect.h"
|
|
+
|
|
+extern void abort (void) __attribute__ ((noreturn));
|
|
+
|
|
+#define N 27
|
|
+
|
|
+/* Condition reduction with different types. */
|
|
+
|
|
+int
|
|
+condition_reduction (float *a, float min_v)
|
|
+{
|
|
+ int last = 0;
|
|
+
|
|
+ for (int i = 0; i < N; i++)
|
|
+ if (a[i] < min_v)
|
|
+ last = i;
|
|
+
|
|
+ return last;
|
|
+}
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ float a[N] = {
|
|
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
|
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
|
+ 21, 22, 23, 24, 25, 26, 27
|
|
+ };
|
|
+
|
|
+ check_vect ();
|
|
+
|
|
+ int ret = condition_reduction (a, 10);
|
|
+ if (ret != 18)
|
|
+ abort ();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
+
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
|
|
new file mode 100644
|
|
index 00000000000..8820075b1dc
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
|
|
@@ -0,0 +1,47 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
+/* { dg-require-effective-target vect_condition } */
|
|
+/* { dg-require-effective-target vect_double } */
|
|
+
|
|
+#include "tree-vect.h"
|
|
+
|
|
+extern void abort (void) __attribute__ ((noreturn));
|
|
+
|
|
+#define N 27
|
|
+
|
|
+/* Condition reduction with different types. */
|
|
+
|
|
+int
|
|
+condition_reduction (double *a, double min_v)
|
|
+{
|
|
+ int last = 0;
|
|
+
|
|
+ for (int i = 0; i < N; i++)
|
|
+ if (a[i] < min_v)
|
|
+ last = i;
|
|
+
|
|
+ return last;
|
|
+}
|
|
+
|
|
+int
|
|
+main (void)
|
|
+{
|
|
+ double a[N] = {
|
|
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
|
|
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
|
+ 21, 22, 23, 24, 25, 26, 27
|
|
+ };
|
|
+
|
|
+ check_vect ();
|
|
+
|
|
+ int ret = condition_reduction (a, 10);
|
|
+ if (ret != 18)
|
|
+ abort ();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
|
|
+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
|
|
+
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
|
|
index 0ba33895592..079704cee81 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
|
|
@@ -52,5 +52,5 @@ int main ()
|
|
|
|
/* Vectorization of loops with multiple types and double reduction is not
|
|
supported yet. */
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
|
|
new file mode 100644
|
|
index 00000000000..946666e918f
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
|
|
@@ -0,0 +1,19 @@
|
|
+/* { dg-do compile } */
|
|
+
|
|
+/* Copied from PR 88915. */
|
|
+void pixel_avg( unsigned char *dst, int i_dst_stride,
|
|
+ unsigned char *src1, int i_src1_stride,
|
|
+ unsigned char *src2, int i_src2_stride,
|
|
+ int i_width, int i_height )
|
|
+ {
|
|
+ for( int y = 0; y < i_height; y++ )
|
|
+ {
|
|
+ for( int x = 0; x < i_width; x++ )
|
|
+ dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
|
|
+ dst += i_dst_stride;
|
|
+ src1 += i_src1_stride;
|
|
+ src2 += i_src2_stride;
|
|
+ }
|
|
+ }
|
|
+
|
|
+/* { dg-final { scan-tree-dump "LOOP EPILOGUE VECTORIZED" "vect" { xfail { arm*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-1.c
|
|
index e170875d7ab..f628c5d3998 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "-fno-tree-scev-cprop" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-2.c
|
|
index a6daa61829e..19d8c22859e 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_long } */
|
|
/* { dg-require-effective-target vect_shift } */
|
|
/* { dg-additional-options "-fno-tree-scev-cprop" } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-3.c
|
|
index 3ffa5166f45..8f5ccb27365 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-3.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-3.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include "tree-vect.h"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-4.c b/gcc/testsuite/gcc.dg/vect/vect-live-4.c
|
|
index 21cc27320ac..553ffcd49f7 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-4.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-4.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include "tree-vect.h"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
|
|
index aff37c100f0..965437c8f03 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "-fno-tree-scev-cprop" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
|
|
index 35689665b54..0d2f17f9003 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "-fno-tree-scev-cprop" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
|
|
index 854116fa36e..a3f60f6ce6d 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_long } */
|
|
/* { dg-additional-options "-fno-tree-scev-cprop" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
|
|
index 18bf5e80917..1f82121df06 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
|
|
index 43887865bf4..b0f74083f2b 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
|
|
index b47a93ab326..864b17ac640 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options double_vectors } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
|
|
index 13238dbe2f9..e65a092f5bf 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
|
|
@@ -23,4 +23,4 @@ foo (){
|
|
return;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
|
|
index d1fbe346a48..a88014a2fbf 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
|
|
@@ -65,4 +65,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
|
|
index d1fbe346a48..a88014a2fbf 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
|
|
@@ -65,4 +65,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
|
|
index d1fbe346a48..a88014a2fbf 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
|
|
@@ -65,4 +65,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
|
|
index d1fbe346a48..4f95c652ee3 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
|
|
@@ -65,4 +65,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } }*/
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c
|
|
new file mode 100644
|
|
index 00000000000..f26d4220532
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c
|
|
@@ -0,0 +1,22 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-require-effective-target vect_float } */
|
|
+/* { dg-additional-options "-fno-math-errno" } */
|
|
+
|
|
+void
|
|
+foo (float * __restrict x, float *y, int n, int m)
|
|
+{
|
|
+ if (m > 0)
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ float tem = x[i], tem1;
|
|
+ for (int j = 0; j < m; ++j)
|
|
+ {
|
|
+ tem += y[j];
|
|
+ tem1 = tem;
|
|
+ tem = __builtin_sqrtf (tem);
|
|
+ }
|
|
+ x[i] = tem - tem1;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_call_sqrtf } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c
|
|
new file mode 100644
|
|
index 00000000000..cc23c6b0866
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c
|
|
@@ -0,0 +1,3 @@
|
|
+/* { dg-require-effective-target vect_int } */
|
|
+
|
|
+#include "vect-peel-1-src.c"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c
|
|
new file mode 100644
|
|
index 00000000000..7980d4dd643
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c
|
|
@@ -0,0 +1,48 @@
|
|
+#include <stdarg.h>
|
|
+#include "tree-vect.h"
|
|
+
|
|
+#define N 128
|
|
+
|
|
+int ib[N+7];
|
|
+
|
|
+__attribute__ ((noinline))
|
|
+int main1 ()
|
|
+{
|
|
+ int i;
|
|
+ int ia[N+1];
|
|
+
|
|
+ /* All the accesses are misaligned. With cost model disabled, we
|
|
+ count the number of aligned accesses for each peeling option, and
|
|
+ in this case we align the two loads if possible (i.e., if
|
|
+ misaligned stores are supported). */
|
|
+ for (i = 1; i <= N; i++)
|
|
+ {
|
|
+ ia[i] = ib[i+2] + ib[i+6];
|
|
+ }
|
|
+
|
|
+ /* check results: */
|
|
+ for (i = 1; i <= N; i++)
|
|
+ {
|
|
+ if (ia[i] != ib[i+2] + ib[i+6])
|
|
+ abort ();
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int main (void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ check_vect ();
|
|
+
|
|
+ for (i = 0; i <= N+6; i++)
|
|
+ {
|
|
+ asm volatile ("" : "+r" (i));
|
|
+ ib[i] = i;
|
|
+ }
|
|
+
|
|
+ return main1 ();
|
|
+}
|
|
+
|
|
+
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
|
|
index fae99ab0b08..a7660a381c4 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
|
|
@@ -1,51 +1,8 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
-#include <stdarg.h>
|
|
-#include "tree-vect.h"
|
|
-
|
|
-#define N 128
|
|
-
|
|
-int ib[N+7];
|
|
-
|
|
-__attribute__ ((noinline))
|
|
-int main1 ()
|
|
-{
|
|
- int i;
|
|
- int ia[N+1];
|
|
-
|
|
- /* All the accesses are misaligned. With cost model disabled, we
|
|
- count the number of aligned accesses for each peeling option, and
|
|
- in this case we align the two loads if possible (i.e., if
|
|
- misaligned stores are supported). */
|
|
- for (i = 1; i <= N; i++)
|
|
- {
|
|
- ia[i] = ib[i+2] + ib[i+6];
|
|
- }
|
|
-
|
|
- /* check results: */
|
|
- for (i = 1; i <= N; i++)
|
|
- {
|
|
- if (ia[i] != ib[i+2] + ib[i+6])
|
|
- abort ();
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-int main (void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- check_vect ();
|
|
-
|
|
- for (i = 0; i <= N+6; i++)
|
|
- {
|
|
- asm volatile ("" : "+r" (i));
|
|
- ib[i] = i;
|
|
- }
|
|
-
|
|
- return main1 ();
|
|
-}
|
|
+#include "vect-peel-1-src.c"
|
|
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
|
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } xfail { ! vect_unaligned_possible } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c
|
|
new file mode 100644
|
|
index 00000000000..8af0fcdca0e
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c
|
|
@@ -0,0 +1,4 @@
|
|
+/* { dg-require-effective-target vect_int } */
|
|
+/* { dg-add-options bind_pic_locally } */
|
|
+
|
|
+#include "vect-peel-3-src.c"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c
|
|
new file mode 100644
|
|
index 00000000000..a21ce8c3d6a
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c
|
|
@@ -0,0 +1,58 @@
|
|
+#include <stdarg.h>
|
|
+#include "tree-vect.h"
|
|
+
|
|
+#if VECTOR_BITS > 128
|
|
+#define NINTS (VECTOR_BITS / 32)
|
|
+#define EXTRA (NINTS * 2)
|
|
+#else
|
|
+#define NINTS 4
|
|
+#define EXTRA 10
|
|
+#endif
|
|
+
|
|
+#define N 128
|
|
+
|
|
+#define RES_A (N * N / 4)
|
|
+#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
|
|
+#define RES_C (N * (N + 1) / 2 + (N + 1))
|
|
+#define RES (RES_A + RES_B + RES_C)
|
|
+
|
|
+int ib[N + EXTRA];
|
|
+int ia[N + EXTRA];
|
|
+int ic[N + EXTRA];
|
|
+
|
|
+__attribute__ ((noinline))
|
|
+int main1 ()
|
|
+{
|
|
+ int i, suma = 0, sumb = 0, sumc = 0;
|
|
+
|
|
+ /* ib and ic have same misalignment, we peel to align them. */
|
|
+ for (i = 0; i <= N; i++)
|
|
+ {
|
|
+ suma += ia[i];
|
|
+ sumb += ib[i + NINTS + 1];
|
|
+ sumc += ic[i + 1];
|
|
+ }
|
|
+
|
|
+ /* check results: */
|
|
+ if (suma + sumb + sumc != RES)
|
|
+ abort ();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int main (void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ check_vect ();
|
|
+
|
|
+ for (i = 0; i < N + EXTRA; i++)
|
|
+ {
|
|
+ asm volatile ("" : "+r" (i));
|
|
+ ib[i] = i;
|
|
+ ic[i] = i+2;
|
|
+ ia[i] = i/2;
|
|
+ }
|
|
+
|
|
+ return main1 ();
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
|
|
index d5c0cf10ce1..2cd99573fd1 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
|
|
@@ -1,64 +1,9 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
-#include <stdarg.h>
|
|
-#include "tree-vect.h"
|
|
-
|
|
-#if VECTOR_BITS > 128
|
|
-#define NINTS (VECTOR_BITS / 32)
|
|
-#define EXTRA (NINTS * 2)
|
|
-#else
|
|
-#define NINTS 4
|
|
-#define EXTRA 10
|
|
-#endif
|
|
-
|
|
-#define N 128
|
|
-
|
|
-#define RES_A (N * N / 4)
|
|
-#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
|
|
-#define RES_C (N * (N + 1) / 2 + (N + 1))
|
|
-#define RES (RES_A + RES_B + RES_C)
|
|
-
|
|
-int ib[N + EXTRA];
|
|
-int ia[N + EXTRA];
|
|
-int ic[N + EXTRA];
|
|
-
|
|
-__attribute__ ((noinline))
|
|
-int main1 ()
|
|
-{
|
|
- int i, suma = 0, sumb = 0, sumc = 0;
|
|
-
|
|
- /* ib and ic have same misalignment, we peel to align them. */
|
|
- for (i = 0; i <= N; i++)
|
|
- {
|
|
- suma += ia[i];
|
|
- sumb += ib[i + NINTS + 1];
|
|
- sumc += ic[i + 1];
|
|
- }
|
|
-
|
|
- /* check results: */
|
|
- if (suma + sumb + sumc != RES)
|
|
- abort ();
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-int main (void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- check_vect ();
|
|
-
|
|
- for (i = 0; i < N + EXTRA; i++)
|
|
- {
|
|
- asm volatile ("" : "+r" (i));
|
|
- ib[i] = i;
|
|
- ic[i] = i+2;
|
|
- ia[i] = i/2;
|
|
- }
|
|
-
|
|
- return main1 ();
|
|
-}
|
|
+#include "vect-peel-3-src.c"
|
|
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
|
|
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { { ! vect_unaligned_possible } || vect_sizes_32B_16B } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c
|
|
new file mode 100644
|
|
index 00000000000..783982f04f6
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c
|
|
@@ -0,0 +1,4 @@
|
|
+/* { dg-require-effective-target vect_int } */
|
|
+/* { dg-add-options bind_pic_locally } */
|
|
+
|
|
+#include "vect-peel-4-src.c"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c
|
|
new file mode 100644
|
|
index 00000000000..33088fb0902
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c
|
|
@@ -0,0 +1,45 @@
|
|
+#include <stdarg.h>
|
|
+#include "tree-vect.h"
|
|
+
|
|
+#define N 128
|
|
+
|
|
+int ib[N+7];
|
|
+int ia[N+1];
|
|
+
|
|
+__attribute__ ((noinline))
|
|
+int main1 ()
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ /* Don't peel keeping one load and the store aligned. */
|
|
+ for (i = 0; i <= N; i++)
|
|
+ {
|
|
+ ia[i] = ib[i] + ib[i+5];
|
|
+ }
|
|
+
|
|
+ /* check results: */
|
|
+ for (i = 1; i <= N; i++)
|
|
+ {
|
|
+ if (ia[i] != ib[i] + ib[i+5])
|
|
+ abort ();
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int main (void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ check_vect ();
|
|
+
|
|
+ for (i = 0; i <= N+6; i++)
|
|
+ {
|
|
+ asm volatile ("" : "+r" (i));
|
|
+ ib[i] = i;
|
|
+ }
|
|
+
|
|
+ return main1 ();
|
|
+}
|
|
+
|
|
+
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
|
|
index 88f9f0ddcba..3b5272f284f 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
|
|
@@ -1,49 +1,9 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-add-options bind_pic_locally } */
|
|
|
|
-#include <stdarg.h>
|
|
-#include "tree-vect.h"
|
|
-
|
|
-#define N 128
|
|
-
|
|
-int ib[N+7];
|
|
-int ia[N+1];
|
|
-
|
|
-__attribute__ ((noinline))
|
|
-int main1 ()
|
|
-{
|
|
- int i;
|
|
-
|
|
- /* Don't peel keeping one load and the store aligned. */
|
|
- for (i = 0; i <= N; i++)
|
|
- {
|
|
- ia[i] = ib[i] + ib[i+5];
|
|
- }
|
|
-
|
|
- /* check results: */
|
|
- for (i = 1; i <= N; i++)
|
|
- {
|
|
- if (ia[i] != ib[i] + ib[i+5])
|
|
- abort ();
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-int main (void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- check_vect ();
|
|
-
|
|
- for (i = 0; i <= N+6; i++)
|
|
- {
|
|
- asm volatile ("" : "+r" (i));
|
|
- ib[i] = i;
|
|
- }
|
|
-
|
|
- return main1 ();
|
|
-}
|
|
+#include "vect-peel-4-src.c"
|
|
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
|
|
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail { ! vect_unaligned_possible } } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
|
|
index e246ae7f3c6..c40f8625b84 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
|
|
@@ -62,4 +62,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
|
|
index 5f0551ee372..dd3045502f1 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
|
|
@@ -46,4 +46,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
|
|
index 02c2bee8612..1a2d8d04f4e 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
|
|
@@ -45,4 +45,4 @@ int main (void)
|
|
return 0;
|
|
}
|
|
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
|
|
index ad148046a8e..cc0d9694a4f 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_float_strict } */
|
|
/* { dg-additional-options "-fno-fast-math" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
|
|
index 171451872e5..ffbc9706901 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
|
|
index ac674749b6f..05e343ad782 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
|
|
/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
|
|
index b036ad5b0b4..e0f47d8a4f2 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
@@ -12,12 +14,6 @@ signed char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
|
|
|
/* char->short->short dot product.
|
|
The dot-product pattern should be detected.
|
|
- The reduction is currently not vectorized becaus of the signed->unsigned->signed
|
|
- casts, since this patch:
|
|
-
|
|
- 2005-12-26 Kazu Hirata <kazu@codesourcery.com>
|
|
-
|
|
- PR tree-optimization/25125
|
|
|
|
When the dot-product is detected, the loop should be vectorized on vect_sdot_qi
|
|
targets (targets that support dot-product of signed char).
|
|
@@ -60,5 +56,5 @@ int main (void)
|
|
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
|
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
|
|
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
|
|
index 57e18040cf2..0fc112012cf 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
|
|
index d020f643bb8..e23ebd9b072 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
|
|
/* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
|
|
index 3155d97b3cd..288be13440d 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
|
|
new file mode 100644
|
|
index 00000000000..dc5704f5607
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
|
|
@@ -0,0 +1,45 @@
|
|
+/* { dg-options "-O3 -fno-vect-cost-model" } */
|
|
+struct {
|
|
+ float real;
|
|
+ float img;
|
|
+} g[11];
|
|
+
|
|
+float __attribute__ ((noclone))
|
|
+foo_11 (void)
|
|
+{
|
|
+ float sum = 0.0;
|
|
+ for (int i = 0; i < 11; ++i)
|
|
+ sum += g[i].real;
|
|
+ return sum;
|
|
+}
|
|
+
|
|
+float __attribute__ ((noclone))
|
|
+foo_10 (void)
|
|
+{
|
|
+ float sum = 0.0;
|
|
+ for (int i = 0; i < 10; ++i)
|
|
+ sum += g[i].real;
|
|
+ return sum;
|
|
+}
|
|
+
|
|
+int main (void)
|
|
+{
|
|
+ float check_10 = 0.0;
|
|
+ float check_11 = 0.0;
|
|
+ for (int i = 0; i < 11; ++i)
|
|
+ {
|
|
+ asm volatile ("" : : : "memory");
|
|
+ g[i].real = (float) i;
|
|
+ g[i].img = (float) -i;
|
|
+ if (i < 10)
|
|
+ check_10 += (float) i;
|
|
+ check_11 += (float) i;
|
|
+ }
|
|
+
|
|
+ if (foo_10 () != check_10)
|
|
+ __builtin_abort ();
|
|
+ if (foo_11 () != check_11)
|
|
+ __builtin_abort ();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
|
|
index b06b234072b..1ddbe96ebc3 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
|
|
index be03c7d011d..7ae2c838344 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
|
|
index c30c85ce911..91ce0ef934e 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
|
|
index a98edd3045a..2190eaa6242 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
|
|
index 570e56a8c9b..6ad645b3bdd 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
|
|
index 8190622d5d7..71df5741e16 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
|
|
@@ -21,6 +21,8 @@ foo ()
|
|
2005-12-26 Kazu Hirata <kazu@codesourcery.com>
|
|
|
|
PR tree-optimization/25125
|
|
+
|
|
+ but we still handle the reduction.
|
|
*/
|
|
|
|
for (i = 0; i < N; i++)
|
|
@@ -43,5 +45,4 @@ main (void)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_qi_to_hi } } } } */
|
|
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
|
|
index a033a7d27d1..2f0bb692564 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_usad_char } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
|
|
index b912a3431f7..e5bbeaede09 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
|
|
@@ -106,4 +106,4 @@ main (int argc, const char **argv)
|
|
}
|
|
|
|
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */
|
|
-/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
|
|
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(MODE=V16QI\\)" 2 "vect" { target avx2_runtime } } } */
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
|
|
index 89f983cad06..4c95dd20179 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "-fno-ipa-icf" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
|
|
index e319699cd92..4075f815cea 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "-fno-ipa-icf" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
|
|
index ee0538c0635..c4ac88e186d 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
/* { dg-additional-options "-fno-ipa-icf" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
|
|
index 6d74c693316..ebbf4f5e841 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include "tree-vect.h"
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
|
|
index 942f63d6f31..2e28baae0b8 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
|
|
index 98f78d3b37a..d277f0b2b94 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
|
|
index 176f183f3ce..6fc7a282351 100644
|
|
--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
|
|
+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-require-effective-target vect_int } */
|
|
|
|
#include <stdarg.h>
|
|
diff --git a/gcc/testsuite/gcc.dg/vshift-5.c b/gcc/testsuite/gcc.dg/vshift-5.c
|
|
index daa5f1c5cd8..62e6328cb28 100644
|
|
--- a/gcc/testsuite/gcc.dg/vshift-5.c
|
|
+++ b/gcc/testsuite/gcc.dg/vshift-5.c
|
|
@@ -40,6 +40,42 @@ f2 (void)
|
|
a[3] = a3;
|
|
}
|
|
|
|
+__attribute__((noinline, noclone)) void
|
|
+f2a (int x)
|
|
+{
|
|
+ long long a0, a1, a2, a3;
|
|
+ a0 = a[0];
|
|
+ a1 = a[1];
|
|
+ a2 = a[2];
|
|
+ a3 = a[3];
|
|
+ a0 = a0 << x;
|
|
+ a1 = a1 << 2;
|
|
+ a2 = a2 << 2;
|
|
+ a3 = a3 << 2;
|
|
+ a[0] = a0;
|
|
+ a[1] = a1;
|
|
+ a[2] = a2;
|
|
+ a[3] = a3;
|
|
+}
|
|
+
|
|
+__attribute__((noinline, noclone)) void
|
|
+f2b (int x)
|
|
+{
|
|
+ long long a0, a1, a2, a3;
|
|
+ a0 = a[0];
|
|
+ a1 = a[1];
|
|
+ a2 = a[2];
|
|
+ a3 = a[3];
|
|
+ a0 = a0 << 2;
|
|
+ a1 = a1 << 2;
|
|
+ a2 = a2 << x;
|
|
+ a3 = a3 << 2;
|
|
+ a[0] = a0;
|
|
+ a[1] = a1;
|
|
+ a[2] = a2;
|
|
+ a[3] = a3;
|
|
+}
|
|
+
|
|
__attribute__((noinline, noclone)) void
|
|
f3 (int x)
|
|
{
|
|
@@ -77,5 +113,13 @@ main ()
|
|
if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
|
|
|| a[2] != (2LL << 9) || a[3] != (1LL << 10))
|
|
abort ();
|
|
+ f2a (3);
|
|
+ if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
|
|
+ || a[2] != (2LL << 11) || a[3] != (1LL << 12))
|
|
+ abort ();
|
|
+ f2b (3);
|
|
+ if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
|
|
+ || a[2] != (2LL << 14) || a[3] != (1LL << 14))
|
|
+ abort ();
|
|
return 0;
|
|
}
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c
|
|
new file mode 100644
|
|
index 00000000000..8ff66714e9b
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c
|
|
@@ -0,0 +1,39 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+#define DEF_DOT(TYPE1, TYPE2) \
|
|
+TYPE1 __attribute__ ((noinline, noclone)) \
|
|
+dot_##TYPE1##_##TYPE2 (TYPE2 *restrict x, TYPE2 *restrict y, int n) \
|
|
+{ \
|
|
+ TYPE1 sum = 0; \
|
|
+ for (int i = 0; i < n; i++) \
|
|
+ { \
|
|
+ sum += x[i] * y[i]; \
|
|
+ } \
|
|
+ return sum; \
|
|
+}
|
|
+
|
|
+DEF_DOT(uint32_t, uint8_t)
|
|
+DEF_DOT(int32_t, int8_t)
|
|
+DEF_DOT(int64_t, int16_t)
|
|
+
|
|
+/* The uint16_t->uint64_t dot product requires a casting to satisfy the C
|
|
+ language rules. */
|
|
+uint64_t __attribute__ ((noinline, noclone))
|
|
+dot_uint64_t_uint16_t (uint16_t *restrict x, uint16_t *restrict y, int n)
|
|
+{
|
|
+ uint64_t sum = 0;
|
|
+ for (int i = 0; i < n; i++)
|
|
+ {
|
|
+ sum += (unsigned int)x[i] * y[i];
|
|
+ }
|
|
+ return sum;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tsdot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tsdot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\t} 8 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
|
|
index 5c04bcdb3f5..51925fa8f50 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
|
|
@@ -17,3 +17,4 @@ f (double *restrict a, double *restrict b, double *restrict c,
|
|
|
|
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
|
/* { dg-final { scan-assembler-not {\tfmad\t} } } */
|
|
+/* { dg-final { scan-assembler-times {\tst1d} 2 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
|
|
new file mode 100644
|
|
index 00000000000..78c70b2be32
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
|
|
@@ -0,0 +1,90 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+#define MASK_SLP_2(TYPE_COND, ALT_VAL) \
|
|
+void __attribute__ ((noinline, noclone)) \
|
|
+mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y, \
|
|
+ TYPE_COND *restrict z, int n) \
|
|
+{ \
|
|
+ for (int i = 0; i < n; i += 2) \
|
|
+ { \
|
|
+ x[i] = y[i] ? z[i] : 1; \
|
|
+ x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \
|
|
+ } \
|
|
+}
|
|
+
|
|
+#define MASK_SLP_4(TYPE_COND, ALT_VAL) \
|
|
+void __attribute__ ((noinline, noclone)) \
|
|
+mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y, \
|
|
+ TYPE_COND *restrict z, int n) \
|
|
+{ \
|
|
+ for (int i = 0; i < n; i += 4) \
|
|
+ { \
|
|
+ x[i] = y[i] ? z[i] : 1; \
|
|
+ x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \
|
|
+ x[i + 2] = y[i + 2] ? z[i + 2] : 1; \
|
|
+ x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \
|
|
+ } \
|
|
+}
|
|
+
|
|
+#define MASK_SLP_8(TYPE_COND, ALT_VAL) \
|
|
+void __attribute__ ((noinline, noclone)) \
|
|
+mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y, \
|
|
+ TYPE_COND *restrict z, int n) \
|
|
+{ \
|
|
+ for (int i = 0; i < n; i += 8) \
|
|
+ { \
|
|
+ x[i] = y[i] ? z[i] : 1; \
|
|
+ x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \
|
|
+ x[i + 2] = y[i + 2] ? z[i + 2] : 1; \
|
|
+ x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \
|
|
+ x[i + 4] = y[i + 4] ? z[i + 4] : 1; \
|
|
+ x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL; \
|
|
+ x[i + 6] = y[i + 6] ? z[i + 6] : 1; \
|
|
+ x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL; \
|
|
+ } \
|
|
+}
|
|
+
|
|
+#define MASK_SLP_FAIL(TYPE_COND) \
|
|
+void __attribute__ ((noinline, noclone)) \
|
|
+mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y, \
|
|
+ TYPE_COND *restrict z, int n) \
|
|
+{ \
|
|
+ for (int i = 0; i < n; i += 2) \
|
|
+ { \
|
|
+ x[i] = y[i] ? z[i] : 1; \
|
|
+ x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]]; \
|
|
+ } \
|
|
+}
|
|
+
|
|
+MASK_SLP_2(int8_t, 1)
|
|
+MASK_SLP_2(int8_t, 2)
|
|
+MASK_SLP_2(int, 1)
|
|
+MASK_SLP_2(int, 2)
|
|
+MASK_SLP_2(int64_t, 1)
|
|
+MASK_SLP_2(int64_t, 2)
|
|
+
|
|
+MASK_SLP_4(int8_t, 1)
|
|
+MASK_SLP_4(int8_t, 2)
|
|
+MASK_SLP_4(int, 1)
|
|
+MASK_SLP_4(int, 2)
|
|
+MASK_SLP_4(int64_t, 1)
|
|
+MASK_SLP_4(int64_t, 2)
|
|
+
|
|
+MASK_SLP_8(int8_t, 1)
|
|
+MASK_SLP_8(int8_t, 2)
|
|
+MASK_SLP_8(int, 1)
|
|
+MASK_SLP_8(int, 2)
|
|
+MASK_SLP_8(int64_t, 1)
|
|
+MASK_SLP_8(int64_t, 2)
|
|
+
|
|
+MASK_SLP_FAIL(int8_t)
|
|
+MASK_SLP_FAIL(int)
|
|
+MASK_SLP_FAIL(int64_t)
|
|
+
|
|
+/* { dg-final { scan-assembler-not {\tld2w\t} } } */
|
|
+/* { dg-final { scan-assembler-not {\tst2w\t} } } */
|
|
+/* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */
|
|
+/* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
|
|
index a258344b0a9..f152d04b473 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
|
|
@@ -105,8 +105,8 @@ reduc_##NAME##_##TYPE (TYPE *a, int n) \
|
|
|
|
TEST_BITWISE (DEF_REDUC_BITWISE)
|
|
|
|
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
|
|
|
@@ -157,8 +157,8 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
|
|
/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
|
|
index 376a453fc73..0640cba8e0f 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
|
|
@@ -116,8 +116,8 @@ reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)], \
|
|
|
|
TEST_BITWISE (DEF_REDUC_BITWISE)
|
|
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
|
|
index ff535942331..cced4ad488e 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
|
|
@@ -23,16 +23,12 @@ REDUC (uint64_t)
|
|
REDUC (float)
|
|
REDUC (double)
|
|
|
|
-/* XFAILed until we support sub-int reductions for signed types. */
|
|
-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m} 1 } } */
|
|
/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m} 1 } } */
|
|
|
|
-/* XFAILed until we support sub-int reductions for signed types. */
|
|
-/* { dg-final { scan-assembler-times {\tsub\t} 8 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tsub\t} 8 } } */
|
|
/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
|
|
index 3913b8848c0..dec4c87e54d 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
|
|
@@ -15,6 +15,5 @@ reduc (int *restrict a, int *restrict b, int *restrict c)
|
|
}
|
|
|
|
/* { dg-final { scan-assembler-times {\tcmpne\tp[0-9]+\.s, } 1 } } */
|
|
-/* We ought to use the CMPNE result for the SEL too. */
|
|
-/* { dg-final { scan-assembler-not {\tcmpeq\tp[0-9]+\.s, } { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-not {\tcmpeq\tp[0-9]+\.s, } } } */
|
|
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, } 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
|
|
index a718e9d2ebf..83ebec50bc6 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
|
|
@@ -1,10 +1,7 @@
|
|
/* { dg-do compile } */
|
|
-/* { dg-options "-O2 -ftree-vectorize -fno-inline -msve-vector-bits=256 -fdump-tree-vect-details" } */
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
|
|
-double mat[100][4];
|
|
-double mat2[100][8];
|
|
-double mat3[100][12];
|
|
-double mat4[100][3];
|
|
+double mat[100][2];
|
|
|
|
double
|
|
slp_reduc_plus (int n)
|
|
@@ -14,115 +11,8 @@ slp_reduc_plus (int n)
|
|
{
|
|
tmp = tmp + mat[i][0];
|
|
tmp = tmp + mat[i][1];
|
|
- tmp = tmp + mat[i][2];
|
|
- tmp = tmp + mat[i][3];
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
-double
|
|
-slp_reduc_plus2 (int n)
|
|
-{
|
|
- double tmp = 0.0;
|
|
- for (int i = 0; i < n; i++)
|
|
- {
|
|
- tmp = tmp + mat2[i][0];
|
|
- tmp = tmp + mat2[i][1];
|
|
- tmp = tmp + mat2[i][2];
|
|
- tmp = tmp + mat2[i][3];
|
|
- tmp = tmp + mat2[i][4];
|
|
- tmp = tmp + mat2[i][5];
|
|
- tmp = tmp + mat2[i][6];
|
|
- tmp = tmp + mat2[i][7];
|
|
- }
|
|
- return tmp;
|
|
-}
|
|
-
|
|
-double
|
|
-slp_reduc_plus3 (int n)
|
|
-{
|
|
- double tmp = 0.0;
|
|
- for (int i = 0; i < n; i++)
|
|
- {
|
|
- tmp = tmp + mat3[i][0];
|
|
- tmp = tmp + mat3[i][1];
|
|
- tmp = tmp + mat3[i][2];
|
|
- tmp = tmp + mat3[i][3];
|
|
- tmp = tmp + mat3[i][4];
|
|
- tmp = tmp + mat3[i][5];
|
|
- tmp = tmp + mat3[i][6];
|
|
- tmp = tmp + mat3[i][7];
|
|
- tmp = tmp + mat3[i][8];
|
|
- tmp = tmp + mat3[i][9];
|
|
- tmp = tmp + mat3[i][10];
|
|
- tmp = tmp + mat3[i][11];
|
|
- }
|
|
- return tmp;
|
|
-}
|
|
-
|
|
-void
|
|
-slp_non_chained_reduc (int n, double * restrict out)
|
|
-{
|
|
- for (int i = 0; i < 3; i++)
|
|
- out[i] = 0;
|
|
-
|
|
- for (int i = 0; i < n; i++)
|
|
- {
|
|
- out[0] = out[0] + mat4[i][0];
|
|
- out[1] = out[1] + mat4[i][1];
|
|
- out[2] = out[2] + mat4[i][2];
|
|
- }
|
|
-}
|
|
-
|
|
-/* Strict FP reductions shouldn't be used for the outer loops, only the
|
|
- inner loops. */
|
|
-
|
|
-float
|
|
-double_reduc1 (float (*restrict i)[16])
|
|
-{
|
|
- float l = 0;
|
|
-
|
|
- for (int a = 0; a < 8; a++)
|
|
- for (int b = 0; b < 8; b++)
|
|
- l += i[b][a];
|
|
- return l;
|
|
-}
|
|
-
|
|
-float
|
|
-double_reduc2 (float *restrict i)
|
|
-{
|
|
- float l = 0;
|
|
-
|
|
- for (int a = 0; a < 8; a++)
|
|
- for (int b = 0; b < 16; b++)
|
|
- {
|
|
- l += i[b * 4];
|
|
- l += i[b * 4 + 1];
|
|
- l += i[b * 4 + 2];
|
|
- l += i[b * 4 + 3];
|
|
- }
|
|
- return l;
|
|
-}
|
|
-
|
|
-float
|
|
-double_reduc3 (float *restrict i, float *restrict j)
|
|
-{
|
|
- float k = 0, l = 0;
|
|
-
|
|
- for (int a = 0; a < 8; a++)
|
|
- for (int b = 0; b < 8; b++)
|
|
- {
|
|
- k += i[b];
|
|
- l += j[b];
|
|
- }
|
|
- return l * k;
|
|
-}
|
|
-
|
|
-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 4 } } */
|
|
-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */
|
|
-/* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3. Each one
|
|
- is reported three times, once for SVE, once for 128-bit AdvSIMD and once
|
|
- for 64-bit AdvSIMD. */
|
|
-/* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */
|
|
-/* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3. */
|
|
-/* { dg-final { scan-tree-dump-times "Detected reduction" 10 "vect" } } */
|
|
+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
|
|
index 0b2a7ad57e3..37b5f1148a3 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
|
|
@@ -32,7 +32,6 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \
|
|
|
|
TEST_ALL (VEC_PERM)
|
|
|
|
-/* ??? We don't treat the int8_t and int16_t loops as reductions. */
|
|
/* ??? We don't treat the uint loops as SLP. */
|
|
/* The loop should be fully-masked. */
|
|
/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
|
|
@@ -41,15 +40,15 @@ TEST_ALL (VEC_PERM)
|
|
/* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
|
|
/* { dg-final { scan-assembler-times {\tld1d\t} 2 } } */
|
|
-/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-not {\tldr} } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
|
|
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
|
|
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
|
|
index b75edc69e2d..6a199d00659 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
|
|
@@ -33,34 +33,24 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
|
|
|
|
TEST_ALL (VEC_PERM)
|
|
|
|
-/* ??? We don't think it's worth using SLP for the 64-bit loops and fall
|
|
- back to the less efficient non-SLP implementation instead. */
|
|
-/* ??? At present we don't treat the int8_t and int16_t loops as
|
|
- reductions. */
|
|
-/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
|
|
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
|
|
/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
|
|
/* { dg-final { scan-assembler-not {\tld2b\t} } } */
|
|
/* { dg-final { scan-assembler-not {\tld2h\t} } } */
|
|
/* { dg-final { scan-assembler-not {\tld2w\t} } } */
|
|
/* { dg-final { scan-assembler-not {\tld2d\t} } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
|
|
|
|
-/* Should be 4 and 6 respectively, if we used reductions for int8_t and
|
|
- int16_t. */
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
|
|
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
|
|
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
|
|
index 9e6aa8ccbf8..19207207999 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
|
|
@@ -31,45 +31,27 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
|
|
T (uint16_t) \
|
|
T (int32_t) \
|
|
T (uint32_t) \
|
|
- T (int64_t) \
|
|
- T (uint64_t) \
|
|
T (_Float16) \
|
|
- T (float) \
|
|
- T (double)
|
|
+ T (float)
|
|
|
|
TEST_ALL (VEC_PERM)
|
|
|
|
-/* We can't use SLP for the 64-bit loops, since the number of reduction
|
|
- results might be greater than the number of elements in the vector.
|
|
- Otherwise we have two loads per loop, one for the initial vector
|
|
- and one for the loop body. */
|
|
-/* ??? At present we don't treat the int8_t and int16_t loops as
|
|
- reductions. */
|
|
-/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
|
|
+/* We have two loads per loop, one for the initial vector and one for
|
|
+ the loop body. */
|
|
+/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
|
|
+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
|
|
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
|
|
-/* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */
|
|
/* { dg-final { scan-assembler-not {\tld4b\t} } } */
|
|
/* { dg-final { scan-assembler-not {\tld4h\t} } } */
|
|
/* { dg-final { scan-assembler-not {\tld4w\t} } } */
|
|
-/* { dg-final { scan-assembler-not {\tld1d\t} } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 } } */
|
|
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 } } */
|
|
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */
|
|
-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
|
|
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
|
|
-/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
|
|
|
|
-/* Should be 4 and 6 respectively, if we used reductions for int8_t and
|
|
- int16_t. */
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
|
|
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
|
|
-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
|
|
|
|
/* { dg-final { scan-assembler-not {\tuqdec} } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
|
|
index 68baba9e965..40ff2d561a8 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
|
|
@@ -15,12 +15,9 @@ f (TYPE *x, TYPE *y, unsigned short n, l
|
|
/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
|
|
-/* Should multiply by (VF-1)*4 rather than (257-1)*4. */
|
|
-/* { dg-final { scan-assembler-not {, 1024} } } */
|
|
-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
|
|
-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
|
|
-/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
|
|
-/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
|
|
-/* Two range checks and a check for n being zero. */
|
|
-/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
|
|
-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
|
|
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
|
|
+ an overlap check that multiplies by (257-1)*4. */
|
|
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
|
|
+/* One range check and a check for n being zero. */
|
|
+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
|
|
index 30f6d2691b8..b8afea70207 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
|
|
@@ -15,7 +15,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
|
|
/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
|
|
-/* Should multiply by (257-1)*4 rather than (VF-1)*4. */
|
|
+/* Should multiply by (257-1)*4 rather than (VF-1)*4 or (VF-2)*4. */
|
|
/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x2, 10, 16\n} 1 } } */
|
|
/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x3, 10, 16\n} 1 } } */
|
|
/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
|
|
index 70792ff9f33..5ab6859ad4e 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
|
|
@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, int n, long m __attribute__((unused)))
|
|
/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
|
|
-/* Should multiply by (VF-1)*4 rather than (257-1)*4. */
|
|
-/* { dg-final { scan-assembler-not {, 1024} } } */
|
|
-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
|
|
-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
|
|
-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
|
|
-/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
|
|
-/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
|
|
-/* Two range checks and a check for n being zero. */
|
|
-/* { dg-final { scan-assembler {\tcmp\t} } } */
|
|
-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
|
|
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
|
|
+ an overlap check that multiplies by (257-1)*4. */
|
|
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
|
|
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
|
|
+/* One range check and a check for n being zero. */
|
|
+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
|
|
index 688f3be61d7..93c114193e9 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
|
|
@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, long n, long m __attribute__((unused)))
|
|
/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */
|
|
/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */
|
|
-/* Should multiply by (VF-1)*8 rather than (257-1)*8. */
|
|
-/* { dg-final { scan-assembler-not {, 2048} } } */
|
|
-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
|
|
-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */
|
|
-/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
|
|
-/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
|
|
-/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
|
|
-/* Two range checks and a check for n being zero. */
|
|
-/* { dg-final { scan-assembler {\tcmp\t} } } */
|
|
-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
|
|
+/* Should use a WAR check that multiplies by (VF-2)*8 rather than
|
|
+ an overlap check that multiplies by (257-1)*4. */
|
|
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #16\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
|
|
+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
|
|
+/* One range check and a check for n being zero. */
|
|
+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
|
|
index 00d84760a19..b38f23e87ba 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
|
|
@@ -98,24 +98,24 @@ TEST_CMP (nugt)
|
|
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
|
|
|
/* 5 for lt, 5 for ult and 5 for nult. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
|
|
|
/* 5 for le, 5 for ule and 5 for nule. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
|
|
|
/* 5 for gt, 5 for ugt and 5 for nugt. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
|
|
|
/* 5 for ge, 5 for uge and 5 for nuge. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
|
|
|
|
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
|
|
/* 3 loops * 5 invocations for all 12 unordered comparisons. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
|
|
|
|
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
|
|
/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
|
|
@@ -123,19 +123,19 @@ TEST_CMP (nugt)
|
|
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
|
/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
|
|
|
|
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
|
|
/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
|
|
for all 12 unordered comparisons. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
|
|
index 23bfb7b2649..2f16fbff522 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
|
|
@@ -19,16 +19,16 @@
|
|
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
|
|
|
|
/* 5 for le, 5 for ule and 5 for nule. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
|
|
|
|
/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt. */
|
|
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
|
|
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
|
|
|
|
/* 5 for ge, 5 for uge and 5 for nuge. */
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
|
|
|
|
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
|
|
/* 3 loops * 5 invocations for ordered, unordered amd ueq. */
|
|
@@ -43,14 +43,14 @@
|
|
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
|
|
/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
|
|
|
|
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
|
|
/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
|
|
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
|
|
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
|
|
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
|
|
|
|
/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
|
|
/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c
|
|
new file mode 100644
|
|
index 00000000000..fe490cfbf3f
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int64_t *x, int64_t *y, int32_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] += y[i];
|
|
+ z[i] += z[i - 2];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2s,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
|
|
new file mode 100644
|
|
index 00000000000..81e77a8bb04
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int16_t *x, int16_t *y, uint8_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 8];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
|
|
new file mode 100644
|
|
index 00000000000..d9da6c1f12a
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int32_t *x, int64_t *y, int64_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 2];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
|
|
new file mode 100644
|
|
index 00000000000..80dab8bf55f
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int16_t *x, int32_t *y, int32_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 4];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
|
|
new file mode 100644
|
|
index 00000000000..655fa7d4bf1
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int8_t *x, int16_t *y, int16_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 8];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c
|
|
new file mode 100644
|
|
index 00000000000..1fe69cad259
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c
|
|
@@ -0,0 +1,19 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int32_t *x, int32_t *y, int16_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] += y[i];
|
|
+ z[i] += z[i - 4];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4h,} 1 } } */
|
|
+/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.2s,} } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c
|
|
new file mode 100644
|
|
index 00000000000..1290772216e
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c
|
|
@@ -0,0 +1,19 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int16_t *x, int16_t *y, int8_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] += y[i];
|
|
+ z[i] += z[i - 8];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
|
|
+/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.4h,} } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c
|
|
new file mode 100644
|
|
index 00000000000..768ea8c7164
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int64_t *x, int64_t *y, int8_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] += y[i];
|
|
+ z[i] += z[i - 8];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 4 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
|
|
new file mode 100644
|
|
index 00000000000..ca8a65a16e7
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int64_t *x, int64_t *y, int32_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 2];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
|
|
new file mode 100644
|
|
index 00000000000..6c09b5b146b
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int32_t *x, int32_t *y, int16_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 4];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
|
|
new file mode 100644
|
|
index 00000000000..94a66c545ef
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int16_t *x, int16_t *y, int8_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 8];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
|
|
new file mode 100644
|
|
index 00000000000..9531966c294
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int64_t *x, int64_t *y, uint32_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 2];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
|
|
new file mode 100644
|
|
index 00000000000..de8f6988685
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
|
+
|
|
+#pragma GCC target "+nosve"
|
|
+
|
|
+#include <stdint.h>
|
|
+
|
|
+void
|
|
+f (int32_t *x, int32_t *y, uint16_t *z, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ x[i] = z[i];
|
|
+ y[i] += y[i - 4];
|
|
+ }
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
|
|
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
|
|
index ae2f8611ea6..9d926ca5dfe 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -mavx -mtune=generic -dp" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c b/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
|
|
index 2a105601c71..51765900fcf 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
|
|
/* { dg-require-effective-target avx2 } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
|
|
index a26aa6529e8..4de04511934 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */ /* PR59617 */
|
|
/* { dg-options "-O3 -mavx512f -fdump-tree-vect-details -mtune=knl" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
|
|
index 2bb9c5c090b..946117d9d30 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -mavx512f -mtune=knl" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
|
|
new file mode 100644
|
|
index 00000000000..235fb917e17
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
|
|
@@ -0,0 +1,35 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
|
|
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
|
|
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
|
|
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
|
|
+
|
|
+#define N 1024
|
|
+int a[N];
|
|
+
|
|
+void
|
|
+f1 (void)
|
|
+{
|
|
+ int i;
|
|
+ #pragma omp simd simdlen (4)
|
|
+ for (i = 0; i < N; ++i)
|
|
+ a[i] = a[i] + 1;
|
|
+}
|
|
+
|
|
+void
|
|
+f2 (void)
|
|
+{
|
|
+ int i;
|
|
+ #pragma omp simd simdlen (8)
|
|
+ for (i = 0; i < N; ++i)
|
|
+ a[i] = a[i] + 2;
|
|
+}
|
|
+
|
|
+void
|
|
+f3 (void)
|
|
+{
|
|
+ int i;
|
|
+ #pragma omp simd simdlen (16)
|
|
+ for (i = 0; i < N; ++i)
|
|
+ a[i] = a[i] + 3;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
|
|
index e5bcdabcf79..2472fb016ee 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
|
|
index dbd078abc81..3d569733b1e 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
|
|
index d0844f208e5..8e5ec4150cc 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
|
|
index b9498a0ff13..0d2a0408d0b 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
|
|
index 0292ba040a3..fcf1a6ceac1 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
|
|
index a716006eda8..650e608117f 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
|
|
index b386b83e39a..c29198ba666 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
|
|
index 81193b2d8b1..cb38b77344f 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
|
|
index d86cb904357..10a350e9e10 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
|
|
index 68ca8388d70..020e5d86f35 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
|
|
index 4db4749c024..3ff23c17aab 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
|
|
index 0b86e6256bd..34671baa28a 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/i386/mask-pack.c b/gcc/testsuite/gcc.target/i386/mask-pack.c
|
|
index 0b564ef4284..a607dfb460c 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/mask-pack.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/mask-pack.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-mavx512bw -O3 -fopenmp-simd -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/mask-unpack.c b/gcc/testsuite/gcc.target/i386/mask-unpack.c
|
|
index 4291480cfff..ca71ea2e29d 100644
|
|
--- a/gcc/testsuite/gcc.target/i386/mask-unpack.c
|
|
+++ b/gcc/testsuite/gcc.target/i386/mask-unpack.c
|
|
@@ -1,3 +1,5 @@
|
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-mavx512bw -mavx512dq -mno-stackrealign -O3 -fopenmp-simd -fdump-tree-vect-details" } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr90358.c b/gcc/testsuite/gcc.target/i386/pr90358.c
|
|
new file mode 100644
|
|
index 00000000000..4894fdbd079
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr90358.c
|
|
@@ -0,0 +1,35 @@
|
|
+/* PR target/90358 */
|
|
+/* { dg-do run { target { sse4_runtime } } } */
|
|
+/* { dg-options "-O3 -msse4" } */
|
|
+
|
|
+struct s { unsigned int a, b, c; };
|
|
+
|
|
+void __attribute__ ((noipa))
|
|
+foo (struct s *restrict s1, struct s *restrict s2, int n)
|
|
+{
|
|
+ for (int i = 0; i < n; ++i)
|
|
+ {
|
|
+ s1[i].b = s2[i].b;
|
|
+ s1[i].c = s2[i].c;
|
|
+ s2[i].c = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+#define N 12
|
|
+
|
|
+int
|
|
+main ()
|
|
+{
|
|
+ struct s s1[N], s2[N];
|
|
+ for (unsigned int j = 0; j < N; ++j)
|
|
+ {
|
|
+ s2[j].a = j * 5;
|
|
+ s2[j].b = j * 5 + 2;
|
|
+ s2[j].c = j * 5 + 4;
|
|
+ }
|
|
+ foo (s1, s2, N);
|
|
+ for (unsigned int j = 0; j < N; ++j)
|
|
+ if (s1[j].b != j * 5 + 2)
|
|
+ __builtin_abort ();
|
|
+ return 0;
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.target/i386/pr91033.c b/gcc/testsuite/gcc.target/i386/pr91033.c
|
|
new file mode 100644
|
|
index 00000000000..43d99d5a7dc
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr91033.c
|
|
@@ -0,0 +1,15 @@
|
|
+/* PR tree-optimization/91033 */
|
|
+/* { dg-do compile { target pthread } } */
|
|
+/* { dg-options "-march=knl -O2 -fopenmp-simd -ftree-parallelize-loops=2" } */
|
|
+
|
|
+#define N 1024
|
|
+int a[N];
|
|
+
|
|
+void
|
|
+foo (void)
|
|
+{
|
|
+ int i;
|
|
+ #pragma omp simd simdlen (4)
|
|
+ for (i = 0; i < N; ++i)
|
|
+ a[i] = a[i] + 1;
|
|
+}
|
|
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-4.f90 b/gcc/testsuite/gfortran.dg/vect/vect-4.f90
|
|
index b567cbd8644..c2eeafd3900 100644
|
|
--- a/gcc/testsuite/gfortran.dg/vect/vect-4.f90
|
|
+++ b/gcc/testsuite/gfortran.dg/vect/vect-4.f90
|
|
@@ -1,3 +1,5 @@
|
|
+! Disabling epilogues until we find a better way to deal with scans.
|
|
+! { dg-additional-options "--param vect-epilogues-nomask=0" }
|
|
! { dg-do compile }
|
|
! { dg-require-effective-target vect_float }
|
|
! { dg-additional-options "--param vect-max-peeling-for-alignment=0" }
|
|
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
|
|
index 0ac5f1c390b..1c243308476 100644
|
|
--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
|
|
+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
|
|
@@ -704,5 +704,6 @@ CALL track('KERNEL ')
|
|
RETURN
|
|
END SUBROUTINE kernel
|
|
|
|
-! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target vect_intdouble_cvt } } }
|
|
-! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { ! vect_intdouble_cvt } } } }
|
|
+! { dg-final { scan-tree-dump-times "vectorized 23 loops" 1 "vect" { target aarch64*-*-* } } }
|
|
+! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
|
|
+! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
|
|
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
|
|
index f8aeec9bae8..621c8ea3dad 100644
|
|
--- a/gcc/tree-cfg.c
|
|
+++ b/gcc/tree-cfg.c
|
|
@@ -3557,6 +3557,24 @@ verify_gimple_assign_unary (gassign *stmt)
|
|
{
|
|
CASE_CONVERT:
|
|
{
|
|
+ /* Allow conversions between vectors with the same number of elements,
|
|
+ provided that the conversion is OK for the element types too. */
|
|
+ if (VECTOR_TYPE_P (lhs_type)
|
|
+ && VECTOR_TYPE_P (rhs1_type)
|
|
+ && known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
|
|
+ TYPE_VECTOR_SUBPARTS (rhs1_type)))
|
|
+ {
|
|
+ lhs_type = TREE_TYPE (lhs_type);
|
|
+ rhs1_type = TREE_TYPE (rhs1_type);
|
|
+ }
|
|
+ else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
|
|
+ {
|
|
+ error ("invalid vector types in nop conversion");
|
|
+ debug_generic_expr (lhs_type);
|
|
+ debug_generic_expr (rhs1_type);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
/* Allow conversions from pointer type to integral type only if
|
|
there is no sign or zero extension involved.
|
|
For targets were the precision of ptrofftype doesn't match that
|
|
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
|
|
index d00c1bd31e6..c95dd204870 100644
|
|
--- a/gcc/tree-data-ref.c
|
|
+++ b/gcc/tree-data-ref.c
|
|
@@ -1287,7 +1287,7 @@ create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
|
|
return dr;
|
|
}
|
|
|
|
-/* A helper function computes order between two tree epxressions T1 and T2.
|
|
+/* A helper function computes order between two tree expressions T1 and T2.
|
|
This is used in comparator functions sorting objects based on the order
|
|
of tree expressions. The function returns -1, 0, or 1. */
|
|
|
|
@@ -1454,6 +1454,54 @@ comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
|
|
return 0;
|
|
}
|
|
|
|
+/* Dump information about ALIAS_PAIR, indenting each line by INDENT. */
|
|
+
|
|
+static void
|
|
+dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
|
|
+{
|
|
+ dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent,
|
|
+ DR_REF (alias_pair->first.dr),
|
|
+ DR_REF (alias_pair->second.dr));
|
|
+
|
|
+ dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
|
|
+ alias_pair->first.seg_len);
|
|
+ if (!operand_equal_p (alias_pair->first.seg_len,
|
|
+ alias_pair->second.seg_len, 0))
|
|
+ dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
|
|
+
|
|
+ dump_printf (MSG_NOTE, "\n%saccess size: ", indent);
|
|
+ dump_dec (MSG_NOTE, alias_pair->first.access_size);
|
|
+ if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
|
|
+ {
|
|
+ dump_printf (MSG_NOTE, " vs. ");
|
|
+ dump_dec (MSG_NOTE, alias_pair->second.access_size);
|
|
+ }
|
|
+
|
|
+ dump_printf (MSG_NOTE, "\n%salignment: %d", indent,
|
|
+ alias_pair->first.align);
|
|
+ if (alias_pair->first.align != alias_pair->second.align)
|
|
+ dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
|
|
+
|
|
+ dump_printf (MSG_NOTE, "\n%sflags: ", indent);
|
|
+ if (alias_pair->flags & DR_ALIAS_RAW)
|
|
+ dump_printf (MSG_NOTE, " RAW");
|
|
+ if (alias_pair->flags & DR_ALIAS_WAR)
|
|
+ dump_printf (MSG_NOTE, " WAR");
|
|
+ if (alias_pair->flags & DR_ALIAS_WAW)
|
|
+ dump_printf (MSG_NOTE, " WAW");
|
|
+ if (alias_pair->flags & DR_ALIAS_ARBITRARY)
|
|
+ dump_printf (MSG_NOTE, " ARBITRARY");
|
|
+ if (alias_pair->flags & DR_ALIAS_SWAPPED)
|
|
+ dump_printf (MSG_NOTE, " SWAPPED");
|
|
+ if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
|
|
+ dump_printf (MSG_NOTE, " UNSWAPPED");
|
|
+ if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
|
|
+ dump_printf (MSG_NOTE, " MIXED_STEPS");
|
|
+ if (alias_pair->flags == 0)
|
|
+ dump_printf (MSG_NOTE, " <none>");
|
|
+ dump_printf (MSG_NOTE, "\n");
|
|
+}
|
|
+
|
|
/* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
|
|
FACTOR is number of iterations that each data reference is accessed.
|
|
|
|
@@ -1488,19 +1536,50 @@ void
|
|
prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
poly_uint64)
|
|
{
|
|
+ if (alias_pairs->is_empty ())
|
|
+ return;
|
|
+
|
|
+ /* Canonicalize each pair so that the base components are ordered wrt
|
|
+ data_ref_compare_tree. This allows the loop below to merge more
|
|
+ cases. */
|
|
+ unsigned int i;
|
|
+ dr_with_seg_len_pair_t *alias_pair;
|
|
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
|
|
+ {
|
|
+ data_reference_p dr_a = alias_pair->first.dr;
|
|
+ data_reference_p dr_b = alias_pair->second.dr;
|
|
+ int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
|
|
+ DR_BASE_ADDRESS (dr_b));
|
|
+ if (comp_res == 0)
|
|
+ comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
|
|
+ if (comp_res == 0)
|
|
+ comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
|
|
+ if (comp_res > 0)
|
|
+ {
|
|
+ std::swap (alias_pair->first, alias_pair->second);
|
|
+ alias_pair->flags |= DR_ALIAS_SWAPPED;
|
|
+ }
|
|
+ else
|
|
+ alias_pair->flags |= DR_ALIAS_UNSWAPPED;
|
|
+ }
|
|
+
|
|
/* Sort the collected data ref pairs so that we can scan them once to
|
|
combine all possible aliasing checks. */
|
|
alias_pairs->qsort (comp_dr_with_seg_len_pair);
|
|
|
|
/* Scan the sorted dr pairs and check if we can combine alias checks
|
|
of two neighboring dr pairs. */
|
|
- for (size_t i = 1; i < alias_pairs->length (); ++i)
|
|
+ unsigned int last = 0;
|
|
+ for (i = 1; i < alias_pairs->length (); ++i)
|
|
{
|
|
/* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
|
|
- dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
|
|
- *dr_b1 = &(*alias_pairs)[i-1].second,
|
|
- *dr_a2 = &(*alias_pairs)[i].first,
|
|
- *dr_b2 = &(*alias_pairs)[i].second;
|
|
+ dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
|
|
+ dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
|
|
+
|
|
+ dr_with_seg_len *dr_a1 = &alias_pair1->first;
|
|
+ dr_with_seg_len *dr_b1 = &alias_pair1->second;
|
|
+ dr_with_seg_len *dr_a2 = &alias_pair2->first;
|
|
+ dr_with_seg_len *dr_b2 = &alias_pair2->second;
|
|
|
|
/* Remove duplicate data ref pairs. */
|
|
if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
|
|
@@ -1509,10 +1588,16 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
|
|
DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
|
|
DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
|
|
- alias_pairs->ordered_remove (i--);
|
|
+ alias_pair1->flags |= alias_pair2->flags;
|
|
continue;
|
|
}
|
|
|
|
+ /* Assume that we won't be able to merge the pairs, then correct
|
|
+ if we do. */
|
|
+ last += 1;
|
|
+ if (last != i)
|
|
+ (*alias_pairs)[last] = (*alias_pairs)[i];
|
|
+
|
|
if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
|
|
{
|
|
/* We consider the case that DR_B1 and DR_B2 are same memrefs,
|
|
@@ -1538,13 +1623,6 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
if (!ordered_p (init_a1, init_a2))
|
|
continue;
|
|
|
|
- /* Make sure dr_a1 starts left of dr_a2. */
|
|
- if (maybe_gt (init_a1, init_a2))
|
|
- {
|
|
- std::swap (*dr_a1, *dr_a2);
|
|
- std::swap (init_a1, init_a2);
|
|
- }
|
|
-
|
|
/* Work out what the segment length would be if we did combine
|
|
DR_A1 and DR_A2:
|
|
|
|
@@ -1561,7 +1639,10 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
|
|
The lengths both have sizetype, so the sign is taken from
|
|
the step instead. */
|
|
- if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
|
|
+ poly_uint64 new_seg_len = 0;
|
|
+ bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
|
|
+ dr_a2->seg_len, 0);
|
|
+ if (new_seg_len_p)
|
|
{
|
|
poly_uint64 seg_len_a1, seg_len_a2;
|
|
if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
|
|
@@ -1579,14 +1660,29 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
int sign_a = tree_int_cst_sgn (indicator_a);
|
|
int sign_b = tree_int_cst_sgn (indicator_b);
|
|
|
|
- poly_uint64 new_seg_len;
|
|
if (sign_a <= 0 && sign_b <= 0)
|
|
new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
|
|
else if (sign_a >= 0 && sign_b >= 0)
|
|
new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
|
|
else
|
|
continue;
|
|
+ }
|
|
+ /* At this point we're committed to merging the refs. */
|
|
|
|
+ /* Make sure dr_a1 starts left of dr_a2. */
|
|
+ if (maybe_gt (init_a1, init_a2))
|
|
+ {
|
|
+ std::swap (*dr_a1, *dr_a2);
|
|
+ std::swap (init_a1, init_a2);
|
|
+ }
|
|
+
|
|
+ /* The DR_Bs are equal, so only the DR_As can introduce
|
|
+ mixed steps. */
|
|
+ if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
|
|
+ alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
|
|
+
|
|
+ if (new_seg_len_p)
|
|
+ {
|
|
dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
|
|
new_seg_len);
|
|
dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
|
|
@@ -1608,17 +1704,40 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
|
|
DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
|
|
DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
|
|
- alias_pairs->ordered_remove (i);
|
|
- i--;
|
|
+ alias_pair1->flags |= alias_pair2->flags;
|
|
+ last -= 1;
|
|
}
|
|
}
|
|
+ alias_pairs->truncate (last + 1);
|
|
+
|
|
+ /* Try to restore the original dr_with_seg_len order within each
|
|
+ dr_with_seg_len_pair_t. If we ended up combining swapped and
|
|
+ unswapped pairs into the same check, we have to invalidate any
|
|
+ RAW, WAR and WAW information for it. */
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf (MSG_NOTE, "merged alias checks:\n");
|
|
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
|
|
+ {
|
|
+ unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
|
|
+ unsigned int swapped = (alias_pair->flags & swap_mask);
|
|
+ if (swapped == DR_ALIAS_SWAPPED)
|
|
+ std::swap (alias_pair->first, alias_pair->second);
|
|
+ else if (swapped != DR_ALIAS_UNSWAPPED)
|
|
+ alias_pair->flags |= DR_ALIAS_ARBITRARY;
|
|
+ alias_pair->flags &= ~swap_mask;
|
|
+ if (dump_enabled_p ())
|
|
+ dump_alias_pair (alias_pair, " ");
|
|
+ }
|
|
}
|
|
|
|
-/* Given LOOP's two data references and segment lengths described by DR_A
|
|
- and DR_B, create expression checking if the two addresses ranges intersect
|
|
- with each other based on index of the two addresses. This can only be
|
|
- done if DR_A and DR_B referring to the same (array) object and the index
|
|
- is the only difference. For example:
|
|
+/* Try to generate a runtime condition that is true if ALIAS_PAIR is
|
|
+ free of aliases, using a condition based on index values instead
|
|
+ of a condition based on addresses. Return true on success,
|
|
+ storing the condition in *COND_EXPR.
|
|
+
|
|
+ This can only be done if the two data references in ALIAS_PAIR access
|
|
+ the same array object and the index is the only difference. For example,
|
|
+ if the two data references are DR_A and DR_B:
|
|
|
|
DR_A DR_B
|
|
data-ref arr[i] arr[j]
|
|
@@ -1635,16 +1754,20 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|
|
|
We can create expression based on index rather than address:
|
|
|
|
- (i_0 + 4 < j_0 || j_0 + 4 < i_0)
|
|
+ (unsigned) (i_0 - j_0 + 3) <= 6
|
|
+
|
|
+ i.e. the indices are less than 4 apart.
|
|
|
|
Note evolution step of index needs to be considered in comparison. */
|
|
|
|
static bool
|
|
create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
|
|
- const dr_with_seg_len& dr_a,
|
|
- const dr_with_seg_len& dr_b)
|
|
+ const dr_with_seg_len_pair_t &alias_pair)
|
|
{
|
|
- if (integer_zerop (DR_STEP (dr_a.dr))
|
|
+ const dr_with_seg_len &dr_a = alias_pair.first;
|
|
+ const dr_with_seg_len &dr_b = alias_pair.second;
|
|
+ if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
|
|
+ || integer_zerop (DR_STEP (dr_a.dr))
|
|
|| integer_zerop (DR_STEP (dr_b.dr))
|
|
|| DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
|
|
return false;
|
|
@@ -1670,15 +1793,8 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
|
|
if (neg_step)
|
|
{
|
|
abs_step = -abs_step;
|
|
- seg_len1 = -seg_len1;
|
|
- seg_len2 = -seg_len2;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* Include the access size in the length, so that we only have one
|
|
- tree addition below. */
|
|
- seg_len1 += dr_a.access_size;
|
|
- seg_len2 += dr_b.access_size;
|
|
+ seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
|
|
+ seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
|
|
}
|
|
|
|
/* Infer the number of iterations with which the memory segment is accessed
|
|
@@ -1692,16 +1808,15 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
|
|
|| !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
|
|
return false;
|
|
|
|
- poly_uint64 niter_access1 = 0, niter_access2 = 0;
|
|
- if (neg_step)
|
|
- {
|
|
- /* Divide each access size by the byte step, rounding up. */
|
|
- if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
|
|
- abs_step, &niter_access1)
|
|
- || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
|
|
- abs_step, &niter_access2))
|
|
- return false;
|
|
- }
|
|
+ /* Divide each access size by the byte step, rounding up. */
|
|
+ poly_uint64 niter_access1, niter_access2;
|
|
+ if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
|
|
+ abs_step, &niter_access1)
|
|
+ || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
|
|
+ abs_step, &niter_access2))
|
|
+ return false;
|
|
+
|
|
+ bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
|
|
|
|
unsigned int i;
|
|
for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
|
|
@@ -1741,44 +1856,298 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
|
|
index of data reference. Like segment length, index length is
|
|
linear function of the number of iterations with index_step as
|
|
the coefficient, i.e, niter_len * idx_step. */
|
|
- tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
|
|
- build_int_cst (TREE_TYPE (min1),
|
|
- niter_len1));
|
|
- tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
|
|
- build_int_cst (TREE_TYPE (min2),
|
|
- niter_len2));
|
|
- tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
|
|
- tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
|
|
- /* Adjust ranges for negative step. */
|
|
+ offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
|
|
+ SIGNED);
|
|
if (neg_step)
|
|
- {
|
|
- /* IDX_LEN1 and IDX_LEN2 are negative in this case. */
|
|
- std::swap (min1, max1);
|
|
- std::swap (min2, max2);
|
|
-
|
|
- /* As with the lengths just calculated, we've measured the access
|
|
- sizes in iterations, so multiply them by the index step. */
|
|
- tree idx_access1
|
|
- = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
|
|
- build_int_cst (TREE_TYPE (min1), niter_access1));
|
|
- tree idx_access2
|
|
- = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
|
|
- build_int_cst (TREE_TYPE (min2), niter_access2));
|
|
-
|
|
- /* MINUS_EXPR because the above values are negative. */
|
|
- max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
|
|
- max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
|
|
- }
|
|
- tree part_cond_expr
|
|
- = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
|
|
- fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
|
|
- fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
|
|
+ abs_idx_step = -abs_idx_step;
|
|
+ poly_offset_int idx_len1 = abs_idx_step * niter_len1;
|
|
+ poly_offset_int idx_len2 = abs_idx_step * niter_len2;
|
|
+ poly_offset_int idx_access1 = abs_idx_step * niter_access1;
|
|
+ poly_offset_int idx_access2 = abs_idx_step * niter_access2;
|
|
+
|
|
+ gcc_assert (known_ge (idx_len1, 0)
|
|
+ && known_ge (idx_len2, 0)
|
|
+ && known_ge (idx_access1, 0)
|
|
+ && known_ge (idx_access2, 0));
|
|
+
|
|
+ /* Each access has the following pattern, with lengths measured
|
|
+ in units of INDEX:
|
|
+
|
|
+ <-- idx_len -->
|
|
+ <--- A: -ve step --->
|
|
+ +-----+-------+-----+-------+-----+
|
|
+ | n-1 | ..... | 0 | ..... | n-1 |
|
|
+ +-----+-------+-----+-------+-----+
|
|
+ <--- B: +ve step --->
|
|
+ <-- idx_len -->
|
|
+ |
|
|
+ min
|
|
+
|
|
+ where "n" is the number of scalar iterations covered by the segment
|
|
+ and where each access spans idx_access units.
|
|
+
|
|
+ A is the range of bytes accessed when the step is negative,
|
|
+ B is the range when the step is positive.
|
|
+
|
|
+ When checking for general overlap, we need to test whether
|
|
+ the range:
|
|
+
|
|
+ [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
|
|
+
|
|
+ overlaps:
|
|
+
|
|
+ [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
|
|
+
|
|
+ where:
|
|
+
|
|
+ low_offsetN = +ve step ? 0 : -idx_lenN;
|
|
+ high_offsetN = +ve step ? idx_lenN : 0;
|
|
+
|
|
+ This is equivalent to testing whether:
|
|
+
|
|
+ min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
|
|
+ && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
|
|
+
|
|
+ Converting this into a single test, there is an overlap if:
|
|
+
|
|
+ 0 <= min2 - min1 + bias <= limit
|
|
+
|
|
+ where bias = high_offset2 + idx_access2 - 1 - low_offset1
|
|
+ limit = (high_offset1 - low_offset1 + idx_access1 - 1)
|
|
+ + (high_offset2 - low_offset2 + idx_access2 - 1)
|
|
+ i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
|
|
+
|
|
+ Combining the tests requires limit to be computable in an unsigned
|
|
+ form of the index type; if it isn't, we fall back to the usual
|
|
+ pointer-based checks.
|
|
+
|
|
+ We can do better if DR_B is a write and if DR_A and DR_B are
|
|
+ well-ordered in both the original and the new code (see the
|
|
+ comment above the DR_ALIAS_* flags for details). In this case
|
|
+ we know that for each i in [0, n-1], the write performed by
|
|
+ access i of DR_B occurs after access numbers j<=i of DR_A in
|
|
+ both the original and the new code. Any write or anti
|
|
+ dependencies wrt those DR_A accesses are therefore maintained.
|
|
+
|
|
+ We just need to make sure that each individual write in DR_B does not
|
|
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
|
|
+ after the DR_B access in the original code but happen before it in
|
|
+ the new code.
|
|
+
|
|
+ We know the steps for both accesses are equal, so by induction, we
|
|
+ just need to test whether the first write of DR_B overlaps a later
|
|
+ access of DR_A. In other words, we need to move min1 along by
|
|
+ one iteration:
|
|
+
|
|
+ min1' = min1 + idx_step
|
|
+
|
|
+ and use the ranges:
|
|
+
|
|
+ [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
|
|
+
|
|
+ and:
|
|
+
|
|
+ [min2, min2 + idx_access2 - 1]
|
|
+
|
|
+ where:
|
|
+
|
|
+ low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
|
|
+ high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */
|
|
+ if (waw_or_war_p)
|
|
+ idx_len1 -= abs_idx_step;
|
|
+
|
|
+ poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
|
|
+ if (!waw_or_war_p)
|
|
+ limit += idx_len2;
|
|
+
|
|
+ tree utype = unsigned_type_for (TREE_TYPE (min1));
|
|
+ if (!wi::fits_to_tree_p (limit, utype))
|
|
+ return false;
|
|
+
|
|
+ poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
|
|
+ poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
|
|
+ poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
|
|
+ /* Equivalent to adding IDX_STEP to MIN1. */
|
|
+ if (waw_or_war_p)
|
|
+ bias -= wi::to_offset (idx_step);
|
|
+
|
|
+ tree subject = fold_build2 (MINUS_EXPR, utype,
|
|
+ fold_convert (utype, min2),
|
|
+ fold_convert (utype, min1));
|
|
+ subject = fold_build2 (PLUS_EXPR, utype, subject,
|
|
+ wide_int_to_tree (utype, bias));
|
|
+ tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
|
|
+ wide_int_to_tree (utype, limit));
|
|
if (*cond_expr)
|
|
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
|
|
*cond_expr, part_cond_expr);
|
|
else
|
|
*cond_expr = part_cond_expr;
|
|
}
|
|
+ if (dump_enabled_p ())
|
|
+ {
|
|
+ if (waw_or_war_p)
|
|
+ dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
|
|
+ else
|
|
+ dump_printf (MSG_NOTE, "using an index-based overlap test\n");
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* A subroutine of create_intersect_range_checks, with a subset of the
|
|
+ same arguments. Try to optimize cases in which the second access
|
|
+ is a write and in which some overlap is valid. */
|
|
+
|
|
+static bool
|
|
+create_waw_or_war_checks (tree *cond_expr,
|
|
+ const dr_with_seg_len_pair_t &alias_pair)
|
|
+{
|
|
+ const dr_with_seg_len& dr_a = alias_pair.first;
|
|
+ const dr_with_seg_len& dr_b = alias_pair.second;
|
|
+
|
|
+ /* Check for cases in which:
|
|
+
|
|
+ (a) DR_B is always a write;
|
|
+ (b) the accesses are well-ordered in both the original and new code
|
|
+ (see the comment above the DR_ALIAS_* flags for details); and
|
|
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
|
|
+ if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
|
|
+ return false;
|
|
+
|
|
+ /* Check for equal (but possibly variable) steps. */
|
|
+ tree step = DR_STEP (dr_a.dr);
|
|
+ if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
|
|
+ return false;
|
|
+
|
|
+ /* Make sure that we can operate on sizetype without loss of precision. */
|
|
+ tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
|
|
+ if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
|
|
+ return false;
|
|
+
|
|
+ /* All addresses involved are known to have a common alignment ALIGN.
|
|
+ We can therefore subtract ALIGN from an exclusive endpoint to get
|
|
+ an inclusive endpoint. In the best (and common) case, ALIGN is the
|
|
+ same as the access sizes of both DRs, and so subtracting ALIGN
|
|
+ cancels out the addition of an access size. */
|
|
+ unsigned int align = MIN (dr_a.align, dr_b.align);
|
|
+ poly_uint64 last_chunk_a = dr_a.access_size - align;
|
|
+ poly_uint64 last_chunk_b = dr_b.access_size - align;
|
|
+
|
|
+ /* Get a boolean expression that is true when the step is negative. */
|
|
+ tree indicator = dr_direction_indicator (dr_a.dr);
|
|
+ tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
|
|
+ fold_convert (ssizetype, indicator),
|
|
+ ssize_int (0));
|
|
+
|
|
+ /* Get lengths in sizetype. */
|
|
+ tree seg_len_a
|
|
+ = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
|
|
+ step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
|
|
+
|
|
+ /* Each access has the following pattern:
|
|
+
|
|
+ <- |seg_len| ->
|
|
+ <--- A: -ve step --->
|
|
+ +-----+-------+-----+-------+-----+
|
|
+ | n-1 | ..... | 0 | ..... | n-1 |
|
|
+ +-----+-------+-----+-------+-----+
|
|
+ <--- B: +ve step --->
|
|
+ <- |seg_len| ->
|
|
+ |
|
|
+ base address
|
|
+
|
|
+ where "n" is the number of scalar iterations covered by the segment.
|
|
+
|
|
+ A is the range of bytes accessed when the step is negative,
|
|
+ B is the range when the step is positive.
|
|
+
|
|
+ We know that DR_B is a write. We also know (from checking that
|
|
+ DR_A and DR_B are well-ordered) that for each i in [0, n-1],
|
|
+ the write performed by access i of DR_B occurs after access numbers
|
|
+ j<=i of DR_A in both the original and the new code. Any write or
|
|
+ anti dependencies wrt those DR_A accesses are therefore maintained.
|
|
+
|
|
+ We just need to make sure that each individual write in DR_B does not
|
|
+ overlap any higher-indexed access in DR_A; such DR_A accesses happen
|
|
+ after the DR_B access in the original code but happen before it in
|
|
+ the new code.
|
|
+
|
|
+ We know the steps for both accesses are equal, so by induction, we
|
|
+ just need to test whether the first write of DR_B overlaps a later
|
|
+ access of DR_A. In other words, we need to move addr_a along by
|
|
+ one iteration:
|
|
+
|
|
+ addr_a' = addr_a + step
|
|
+
|
|
+ and check whether:
|
|
+
|
|
+ [addr_b, addr_b + last_chunk_b]
|
|
+
|
|
+ overlaps:
|
|
+
|
|
+ [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
|
|
+
|
|
+ where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.:
|
|
+
|
|
+ low_offset_a = +ve step ? 0 : seg_len_a - step
|
|
+ high_offset_a = +ve step ? seg_len_a - step : 0
|
|
+
|
|
+ This is equivalent to testing whether:
|
|
+
|
|
+ addr_a' + low_offset_a <= addr_b + last_chunk_b
|
|
+ && addr_b <= addr_a' + high_offset_a + last_chunk_a
|
|
+
|
|
+ Converting this into a single test, there is an overlap if:
|
|
+
|
|
+ 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
|
|
+
|
|
+ where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
|
|
+
|
|
+ If DR_A is performed, limit + |step| - last_chunk_b is known to be
|
|
+ less than the size of the object underlying DR_A. We also know
|
|
+ that last_chunk_b <= |step|; this is checked elsewhere if it isn't
|
|
+ guaranteed at compile time. There can therefore be no overflow if
|
|
+ "limit" is calculated in an unsigned type with pointer precision. */
|
|
+ tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
|
|
+ DR_OFFSET (dr_a.dr));
|
|
+ addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
|
|
+
|
|
+ tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
|
|
+ DR_OFFSET (dr_b.dr));
|
|
+ addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
|
|
+
|
|
+ /* Advance ADDR_A by one iteration and adjust the length to compensate. */
|
|
+ addr_a = fold_build_pointer_plus (addr_a, step);
|
|
+ tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
|
|
+ seg_len_a, step);
|
|
+ if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
|
|
+ seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
|
|
+
|
|
+ tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
|
|
+ seg_len_a_minus_step, size_zero_node);
|
|
+ if (!CONSTANT_CLASS_P (low_offset_a))
|
|
+ low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
|
|
+
|
|
+ /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
|
|
+ but it's usually more efficient to reuse the LOW_OFFSET_A result. */
|
|
+ tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
|
|
+ low_offset_a);
|
|
+
|
|
+ /* The amount added to addr_b - addr_a'. */
|
|
+ tree bias = fold_build2 (MINUS_EXPR, sizetype,
|
|
+ size_int (last_chunk_b), low_offset_a);
|
|
+
|
|
+ tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
|
|
+ limit = fold_build2 (PLUS_EXPR, sizetype, limit,
|
|
+ size_int (last_chunk_a + last_chunk_b));
|
|
+
|
|
+ tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
|
|
+ subject = fold_build2 (PLUS_EXPR, sizetype,
|
|
+ fold_convert (sizetype, subject), bias);
|
|
+
|
|
+ *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
|
|
return true;
|
|
}
|
|
|
|
@@ -1866,24 +2235,29 @@ get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
|
|
*seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
|
|
}
|
|
|
|
-/* Given two data references and segment lengths described by DR_A and DR_B,
|
|
- create expression checking if the two addresses ranges intersect with
|
|
- each other:
|
|
+/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
|
|
+ storing the condition in *COND_EXPR. The fallback is to generate a
|
|
+ a test that the two accesses do not overlap:
|
|
|
|
- ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
|
|
- || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */
|
|
+ end_a <= start_b || end_b <= start_a. */
|
|
|
|
static void
|
|
create_intersect_range_checks (struct loop *loop, tree *cond_expr,
|
|
- const dr_with_seg_len& dr_a,
|
|
- const dr_with_seg_len& dr_b)
|
|
+ const dr_with_seg_len_pair_t &alias_pair)
|
|
{
|
|
+ const dr_with_seg_len& dr_a = alias_pair.first;
|
|
+ const dr_with_seg_len& dr_b = alias_pair.second;
|
|
*cond_expr = NULL_TREE;
|
|
- if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
|
|
+ if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
|
|
+ return;
|
|
+
|
|
+ if (create_waw_or_war_checks (cond_expr, alias_pair))
|
|
return;
|
|
|
|
unsigned HOST_WIDE_INT min_align;
|
|
tree_code cmp_code;
|
|
+ /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
|
|
+ are equivalent. This is just an optimization heuristic. */
|
|
if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
|
|
&& TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
|
|
{
|
|
@@ -1924,6 +2298,8 @@ create_intersect_range_checks (struct loop *loop, tree *cond_expr,
|
|
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
|
|
fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
|
|
fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf (MSG_NOTE, "using an address-based overlap test\n");
|
|
}
|
|
|
|
/* Create a conditional expression that represents the run-time checks for
|
|
@@ -1940,18 +2316,19 @@ create_runtime_alias_checks (struct loop *loop,
|
|
tree part_cond_expr;
|
|
|
|
fold_defer_overflow_warnings ();
|
|
- for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
|
|
+ dr_with_seg_len_pair_t *alias_pair;
|
|
+ unsigned int i;
|
|
+ FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
|
|
{
|
|
- const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
|
|
- const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
|
|
-
|
|
+ gcc_assert (alias_pair->flags);
|
|
if (dump_enabled_p ())
|
|
dump_printf (MSG_NOTE,
|
|
"create runtime check for data references %T and %T\n",
|
|
- DR_REF (dr_a.dr), DR_REF (dr_b.dr));
|
|
+ DR_REF (alias_pair->first.dr),
|
|
+ DR_REF (alias_pair->second.dr));
|
|
|
|
/* Create condition expression for each pair data references. */
|
|
- create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
|
|
+ create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
|
|
if (*cond_expr)
|
|
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
|
|
*cond_expr, part_cond_expr);
|
|
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
|
|
index 70cbb03b49c..9cb48a2ea3e 100644
|
|
--- a/gcc/tree-data-ref.h
|
|
+++ b/gcc/tree-data-ref.h
|
|
@@ -221,19 +221,113 @@ struct dr_with_seg_len
|
|
unsigned int align;
|
|
};
|
|
|
|
+/* Flags that describe a potential alias between two dr_with_seg_lens.
|
|
+ In general, each pair of dr_with_seg_lens represents a composite of
|
|
+ multiple access pairs P, so testing flags like DR_IS_READ on the DRs
|
|
+ does not give meaningful information.
|
|
+
|
|
+ DR_ALIAS_RAW:
|
|
+ There is a pair in P for which the second reference is a read
|
|
+ and the first is a write.
|
|
+
|
|
+ DR_ALIAS_WAR:
|
|
+ There is a pair in P for which the second reference is a write
|
|
+ and the first is a read.
|
|
+
|
|
+ DR_ALIAS_WAW:
|
|
+ There is a pair in P for which both references are writes.
|
|
+
|
|
+ DR_ALIAS_ARBITRARY:
|
|
+ Either
|
|
+ (a) it isn't possible to classify one pair in P as RAW, WAW or WAR; or
|
|
+ (b) there is a pair in P that breaks the ordering assumption below.
|
|
+
|
|
+ This flag overrides the RAW, WAR and WAW flags above.
|
|
+
|
|
+ DR_ALIAS_UNSWAPPED:
|
|
+ DR_ALIAS_SWAPPED:
|
|
+ Temporary flags that indicate whether there is a pair P whose
|
|
+ DRs have or haven't been swapped around.
|
|
+
|
|
+ DR_ALIAS_MIXED_STEPS:
|
|
+ The DR_STEP for one of the data references in the pair does not
|
|
+ accurately describe that reference for all members of P. (Note
|
|
+ that the flag does not say anything about whether the DR_STEPs
|
|
+ of the two references in the pair are the same.)
|
|
+
|
|
+ The ordering assumption mentioned above is that for every pair
|
|
+ (DR_A, DR_B) in P:
|
|
+
|
|
+ (1) The original code accesses n elements for DR_A and n elements for DR_B,
|
|
+ interleaved as follows:
|
|
+
|
|
+ one access of size DR_A.access_size at DR_A.dr
|
|
+ one access of size DR_B.access_size at DR_B.dr
|
|
+ one access of size DR_A.access_size at DR_A.dr + STEP_A
|
|
+ one access of size DR_B.access_size at DR_B.dr + STEP_B
|
|
+ one access of size DR_A.access_size at DR_A.dr + STEP_A * 2
|
|
+ one access of size DR_B.access_size at DR_B.dr + STEP_B * 2
|
|
+ ...
|
|
+
|
|
+ (2) The new code accesses the same data in exactly two chunks:
|
|
+
|
|
+ one group of accesses spanning |DR_A.seg_len| + DR_A.access_size
|
|
+ one group of accesses spanning |DR_B.seg_len| + DR_B.access_size
|
|
+
|
|
+ A pair might break this assumption if the DR_A and DR_B accesses
|
|
+ in the original or the new code are mingled in some way. For example,
|
|
+ if DR_A.access_size represents the effect of two individual writes
|
|
+ to nearby locations, the pair breaks the assumption if those writes
|
|
+ occur either side of the access for DR_B.
|
|
+
|
|
+ Note that DR_ALIAS_ARBITRARY describes whether the ordering assumption
|
|
+ fails to hold for any individual pair in P. If the assumption *does*
|
|
+ hold for every pair in P, it doesn't matter whether it holds for the
|
|
+ composite pair or not. In other words, P should represent the complete
|
|
+ set of pairs that the composite pair is testing, so only the ordering
|
|
+ of two accesses in the same member of P matters. */
|
|
+const unsigned int DR_ALIAS_RAW = 1U << 0;
|
|
+const unsigned int DR_ALIAS_WAR = 1U << 1;
|
|
+const unsigned int DR_ALIAS_WAW = 1U << 2;
|
|
+const unsigned int DR_ALIAS_ARBITRARY = 1U << 3;
|
|
+const unsigned int DR_ALIAS_SWAPPED = 1U << 4;
|
|
+const unsigned int DR_ALIAS_UNSWAPPED = 1U << 5;
|
|
+const unsigned int DR_ALIAS_MIXED_STEPS = 1U << 6;
|
|
+
|
|
/* This struct contains two dr_with_seg_len objects with aliasing data
|
|
refs. Two comparisons are generated from them. */
|
|
|
|
struct dr_with_seg_len_pair_t
|
|
{
|
|
- dr_with_seg_len_pair_t (const dr_with_seg_len& d1,
|
|
- const dr_with_seg_len& d2)
|
|
- : first (d1), second (d2) {}
|
|
+ /* WELL_ORDERED indicates that the ordering assumption described above
|
|
+ DR_ALIAS_ARBITRARY holds. REORDERED indicates that it doesn't. */
|
|
+ enum sequencing { WELL_ORDERED, REORDERED };
|
|
+
|
|
+ dr_with_seg_len_pair_t (const dr_with_seg_len &,
|
|
+ const dr_with_seg_len &, sequencing);
|
|
|
|
dr_with_seg_len first;
|
|
dr_with_seg_len second;
|
|
+ unsigned int flags;
|
|
};
|
|
|
|
+inline dr_with_seg_len_pair_t::
|
|
+dr_with_seg_len_pair_t (const dr_with_seg_len &d1, const dr_with_seg_len &d2,
|
|
+ sequencing seq)
|
|
+ : first (d1), second (d2), flags (0)
|
|
+{
|
|
+ if (DR_IS_READ (d1.dr) && DR_IS_WRITE (d2.dr))
|
|
+ flags |= DR_ALIAS_WAR;
|
|
+ else if (DR_IS_WRITE (d1.dr) && DR_IS_READ (d2.dr))
|
|
+ flags |= DR_ALIAS_RAW;
|
|
+ else if (DR_IS_WRITE (d1.dr) && DR_IS_WRITE (d2.dr))
|
|
+ flags |= DR_ALIAS_WAW;
|
|
+ else
|
|
+ gcc_unreachable ();
|
|
+ if (seq == REORDERED)
|
|
+ flags |= DR_ALIAS_ARBITRARY;
|
|
+}
|
|
+
|
|
enum data_dependence_direction {
|
|
dir_positive,
|
|
dir_negative,
|
|
diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
|
|
index 2780a4b243f..bd946e14eb6 100644
|
|
--- a/gcc/tree-if-conv.c
|
|
+++ b/gcc/tree-if-conv.c
|
|
@@ -120,6 +120,7 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "fold-const.h"
|
|
#include "tree-ssa-sccvn.h"
|
|
#include "tree-cfgcleanup.h"
|
|
+#include "tree-ssa-dse.h"
|
|
|
|
/* Only handle PHIs with no more arguments unless we are asked to by
|
|
simd pragma. */
|
|
@@ -2884,7 +2885,7 @@ ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
|
|
loop vectorization. */
|
|
|
|
static void
|
|
-ifcvt_local_dce (basic_block bb)
|
|
+ifcvt_local_dce (class loop *loop)
|
|
{
|
|
gimple *stmt;
|
|
gimple *stmt1;
|
|
@@ -2901,6 +2902,10 @@ ifcvt_local_dce (basic_block bb)
|
|
replace_uses_by (name_pair->first, name_pair->second);
|
|
redundant_ssa_names.release ();
|
|
|
|
+ /* The loop has a single BB only. */
|
|
+ basic_block bb = loop->header;
|
|
+ tree latch_vdef = NULL_TREE;
|
|
+
|
|
worklist.create (64);
|
|
/* Consider all phi as live statements. */
|
|
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
@@ -2908,6 +2913,8 @@ ifcvt_local_dce (basic_block bb)
|
|
phi = gsi_stmt (gsi);
|
|
gimple_set_plf (phi, GF_PLF_2, true);
|
|
worklist.safe_push (phi);
|
|
+ if (virtual_operand_p (gimple_phi_result (phi)))
|
|
+ latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
|
|
}
|
|
/* Consider load/store statements, CALL and COND as live. */
|
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
@@ -2971,6 +2978,19 @@ ifcvt_local_dce (basic_block bb)
|
|
while (!gsi_end_p (gsi))
|
|
{
|
|
stmt = gsi_stmt (gsi);
|
|
+ if (gimple_store_p (stmt))
|
|
+ {
|
|
+ tree lhs = gimple_get_lhs (stmt);
|
|
+ ao_ref write;
|
|
+ ao_ref_init (&write, lhs);
|
|
+
|
|
+ if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
|
|
+ == DSE_STORE_DEAD)
|
|
+ delete_dead_or_redundant_assignment (&gsi, "dead");
|
|
+ gsi_next (&gsi);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
if (gimple_plf (stmt, GF_PLF_2))
|
|
{
|
|
gsi_next (&gsi);
|
|
@@ -3071,9 +3091,6 @@ tree_if_conversion (struct loop *loop, vec<gimple *> *preds)
|
|
on-the-fly. */
|
|
combine_blocks (loop);
|
|
|
|
- /* Delete dead predicate computations. */
|
|
- ifcvt_local_dce (loop->header);
|
|
-
|
|
/* Perform local CSE, this esp. helps the vectorizer analysis if loads
|
|
and stores are involved. CSE only the loop body, not the entry
|
|
PHIs, those are to be kept in sync with the non-if-converted copy.
|
|
@@ -3082,6 +3099,9 @@ tree_if_conversion (struct loop *loop, vec<gimple *> *preds)
|
|
bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
|
|
bitmap_set_bit (exit_bbs, loop->latch->index);
|
|
todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
|
|
+
|
|
+ /* Delete dead predicate computations. */
|
|
+ ifcvt_local_dce (loop);
|
|
BITMAP_FREE (exit_bbs);
|
|
|
|
todo |= TODO_cleanup_cfg;
|
|
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
|
|
index d115fcb1a5b..2fbcd6e3e46 100644
|
|
--- a/gcc/tree-inline.c
|
|
+++ b/gcc/tree-inline.c
|
|
@@ -6201,11 +6201,11 @@ tree_function_versioning (tree old_decl, tree new_decl,
|
|
in the debug info that var (whole DECL_ORIGIN is the parm
|
|
PARM_DECL) is optimized away, but could be looked up at the
|
|
call site as value of D#X there. */
|
|
- tree var = vars, vexpr;
|
|
+ tree vexpr;
|
|
gimple_stmt_iterator cgsi
|
|
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
|
gimple *def_temp;
|
|
- var = vars;
|
|
+ tree var = vars;
|
|
i = vec_safe_length (*debug_args);
|
|
do
|
|
{
|
|
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
|
|
index 8959f52a67b..a002bcd57b2 100644
|
|
--- a/gcc/tree-loop-distribution.c
|
|
+++ b/gcc/tree-loop-distribution.c
|
|
@@ -2445,12 +2445,6 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
|
|
struct data_reference *dr_a = DDR_A (ddr);
|
|
struct data_reference *dr_b = DDR_B (ddr);
|
|
tree seg_length_a, seg_length_b;
|
|
- int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
|
|
- DR_BASE_ADDRESS (dr_b));
|
|
-
|
|
- if (comp_res == 0)
|
|
- comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
|
|
- gcc_assert (comp_res != 0);
|
|
|
|
if (latch_dominated_by_data_ref (loop, dr_a))
|
|
seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
|
|
@@ -2471,11 +2465,9 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
|
|
|
|
dr_with_seg_len_pair_t dr_with_seg_len_pair
|
|
(dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
|
|
- dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
|
|
-
|
|
- /* Canonicalize pairs by sorting the two DR members. */
|
|
- if (comp_res > 0)
|
|
- std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
|
|
+ dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
|
|
+ /* ??? Would WELL_ORDERED be safe? */
|
|
+ dr_with_seg_len_pair_t::REORDERED);
|
|
|
|
comp_alias_pairs->safe_push (dr_with_seg_len_pair);
|
|
}
|
|
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
|
|
index dad6e2884db..e841da66db5 100644
|
|
--- a/gcc/tree-parloops.c
|
|
+++ b/gcc/tree-parloops.c
|
|
@@ -88,7 +88,8 @@ along with GCC; see the file COPYING3. If not see
|
|
More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
|
|
/*
|
|
Reduction handling:
|
|
- currently we use vect_force_simple_reduction() to detect reduction patterns.
|
|
+ currently we use code inspired by vect_force_simple_reduction to detect
|
|
+ reduction patterns.
|
|
The code transformation will be introduced by an example.
|
|
|
|
|
|
@@ -182,6 +183,717 @@ parloop
|
|
|
|
*/
|
|
|
|
+/* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
|
|
+ statement STMT is printed with a message MSG. */
|
|
+
|
|
+static void
|
|
+report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
|
|
+{
|
|
+ dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
|
|
+}
|
|
+
|
|
+/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
|
|
+ operation. Return true if the results of DEF_STMT_INFO are something
|
|
+ that can be accumulated by such a reduction. */
|
|
+
|
|
+static bool
|
|
+parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
|
|
+{
|
|
+ return (is_gimple_assign (def_stmt_info->stmt)
|
|
+ || is_gimple_call (def_stmt_info->stmt)
|
|
+ || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
|
|
+ || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
|
|
+ && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
|
|
+ && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
|
|
+}
|
|
+
|
|
+/* Detect SLP reduction of the form:
|
|
+
|
|
+ #a1 = phi <a5, a0>
|
|
+ a2 = operation (a1)
|
|
+ a3 = operation (a2)
|
|
+ a4 = operation (a3)
|
|
+ a5 = operation (a4)
|
|
+
|
|
+ #a = phi <a5>
|
|
+
|
|
+ PHI is the reduction phi node (#a1 = phi <a5, a0> above)
|
|
+ FIRST_STMT is the first reduction stmt in the chain
|
|
+ (a2 = operation (a1)).
|
|
+
|
|
+ Return TRUE if a reduction chain was detected. */
|
|
+
|
|
+static bool
|
|
+parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
|
|
+ gimple *first_stmt)
|
|
+{
|
|
+ class loop *loop = (gimple_bb (phi))->loop_father;
|
|
+ class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
|
+ enum tree_code code;
|
|
+ gimple *loop_use_stmt = NULL;
|
|
+ stmt_vec_info use_stmt_info;
|
|
+ tree lhs;
|
|
+ imm_use_iterator imm_iter;
|
|
+ use_operand_p use_p;
|
|
+ int nloop_uses, size = 0, n_out_of_loop_uses;
|
|
+ bool found = false;
|
|
+
|
|
+ if (loop != vect_loop)
|
|
+ return false;
|
|
+
|
|
+ auto_vec<stmt_vec_info, 8> reduc_chain;
|
|
+ lhs = PHI_RESULT (phi);
|
|
+ code = gimple_assign_rhs_code (first_stmt);
|
|
+ while (1)
|
|
+ {
|
|
+ nloop_uses = 0;
|
|
+ n_out_of_loop_uses = 0;
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
|
|
+ {
|
|
+ gimple *use_stmt = USE_STMT (use_p);
|
|
+ if (is_gimple_debug (use_stmt))
|
|
+ continue;
|
|
+
|
|
+ /* Check if we got back to the reduction phi. */
|
|
+ if (use_stmt == phi)
|
|
+ {
|
|
+ loop_use_stmt = use_stmt;
|
|
+ found = true;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
|
|
+ {
|
|
+ loop_use_stmt = use_stmt;
|
|
+ nloop_uses++;
|
|
+ }
|
|
+ else
|
|
+ n_out_of_loop_uses++;
|
|
+
|
|
+ /* There are can be either a single use in the loop or two uses in
|
|
+ phi nodes. */
|
|
+ if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (found)
|
|
+ break;
|
|
+
|
|
+ /* We reached a statement with no loop uses. */
|
|
+ if (nloop_uses == 0)
|
|
+ return false;
|
|
+
|
|
+ /* This is a loop exit phi, and we haven't reached the reduction phi. */
|
|
+ if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
|
|
+ return false;
|
|
+
|
|
+ if (!is_gimple_assign (loop_use_stmt)
|
|
+ || code != gimple_assign_rhs_code (loop_use_stmt)
|
|
+ || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
|
|
+ return false;
|
|
+
|
|
+ /* Insert USE_STMT into reduction chain. */
|
|
+ use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
|
|
+ reduc_chain.safe_push (use_stmt_info);
|
|
+
|
|
+ lhs = gimple_assign_lhs (loop_use_stmt);
|
|
+ size++;
|
|
+ }
|
|
+
|
|
+ if (!found || loop_use_stmt != phi || size < 2)
|
|
+ return false;
|
|
+
|
|
+ /* Swap the operands, if needed, to make the reduction operand be the second
|
|
+ operand. */
|
|
+ lhs = PHI_RESULT (phi);
|
|
+ for (unsigned i = 0; i < reduc_chain.length (); ++i)
|
|
+ {
|
|
+ gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
|
|
+ if (gimple_assign_rhs2 (next_stmt) == lhs)
|
|
+ {
|
|
+ tree op = gimple_assign_rhs1 (next_stmt);
|
|
+ stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
|
|
+
|
|
+ /* Check that the other def is either defined in the loop
|
|
+ ("vect_internal_def"), or it's an induction (defined by a
|
|
+ loop-header phi-node). */
|
|
+ if (def_stmt_info
|
|
+ && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
|
|
+ && parloops_valid_reduction_input_p (def_stmt_info))
|
|
+ {
|
|
+ lhs = gimple_assign_lhs (next_stmt);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ tree op = gimple_assign_rhs2 (next_stmt);
|
|
+ stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
|
|
+
|
|
+ /* Check that the other def is either defined in the loop
|
|
+ ("vect_internal_def"), or it's an induction (defined by a
|
|
+ loop-header phi-node). */
|
|
+ if (def_stmt_info
|
|
+ && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
|
|
+ && parloops_valid_reduction_input_p (def_stmt_info))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
|
|
+ next_stmt);
|
|
+
|
|
+ swap_ssa_operands (next_stmt,
|
|
+ gimple_assign_rhs1_ptr (next_stmt),
|
|
+ gimple_assign_rhs2_ptr (next_stmt));
|
|
+ update_stmt (next_stmt);
|
|
+
|
|
+ if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
|
|
+ LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
|
|
+ }
|
|
+ else
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ lhs = gimple_assign_lhs (next_stmt);
|
|
+ }
|
|
+
|
|
+ /* Build up the actual chain. */
|
|
+ for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
|
|
+ {
|
|
+ REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
|
|
+ REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
|
|
+ }
|
|
+ REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
|
|
+ REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
|
|
+
|
|
+ /* Save the chain for further analysis in SLP detection. */
|
|
+ LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
|
|
+ REDUC_GROUP_SIZE (reduc_chain[0]) = size;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* Return true if we need an in-order reduction for operation CODE
|
|
+ on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
|
|
+ overflow must wrap. */
|
|
+
|
|
+static bool
|
|
+parloops_needs_fold_left_reduction_p (tree type, tree_code code,
|
|
+ bool need_wrapping_integral_overflow)
|
|
+{
|
|
+ /* CHECKME: check for !flag_finite_math_only too? */
|
|
+ if (SCALAR_FLOAT_TYPE_P (type))
|
|
+ switch (code)
|
|
+ {
|
|
+ case MIN_EXPR:
|
|
+ case MAX_EXPR:
|
|
+ return false;
|
|
+
|
|
+ default:
|
|
+ return !flag_associative_math;
|
|
+ }
|
|
+
|
|
+ if (INTEGRAL_TYPE_P (type))
|
|
+ {
|
|
+ if (!operation_no_trapping_overflow (type, code))
|
|
+ return true;
|
|
+ if (need_wrapping_integral_overflow
|
|
+ && !TYPE_OVERFLOW_WRAPS (type)
|
|
+ && operation_can_overflow (code))
|
|
+ return true;
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (SAT_FIXED_POINT_TYPE_P (type))
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+
|
|
+/* Function parloops_is_simple_reduction
|
|
+
|
|
+ (1) Detect a cross-iteration def-use cycle that represents a simple
|
|
+ reduction computation. We look for the following pattern:
|
|
+
|
|
+ loop_header:
|
|
+ a1 = phi < a0, a2 >
|
|
+ a3 = ...
|
|
+ a2 = operation (a3, a1)
|
|
+
|
|
+ or
|
|
+
|
|
+ a3 = ...
|
|
+ loop_header:
|
|
+ a1 = phi < a0, a2 >
|
|
+ a2 = operation (a3, a1)
|
|
+
|
|
+ such that:
|
|
+ 1. operation is commutative and associative and it is safe to
|
|
+ change the order of the computation
|
|
+ 2. no uses for a2 in the loop (a2 is used out of the loop)
|
|
+ 3. no uses of a1 in the loop besides the reduction operation
|
|
+ 4. no uses of a1 outside the loop.
|
|
+
|
|
+ Conditions 1,4 are tested here.
|
|
+ Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
|
|
+
|
|
+ (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
|
|
+ nested cycles.
|
|
+
|
|
+ (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
|
|
+ reductions:
|
|
+
|
|
+ a1 = phi < a0, a2 >
|
|
+ inner loop (def of a3)
|
|
+ a2 = phi < a3 >
|
|
+
|
|
+ (4) Detect condition expressions, ie:
|
|
+ for (int i = 0; i < N; i++)
|
|
+ if (a[i] < val)
|
|
+ ret_val = a[i];
|
|
+
|
|
+*/
|
|
+
|
|
+static stmt_vec_info
|
|
+parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
+ bool *double_reduc,
|
|
+ bool need_wrapping_integral_overflow,
|
|
+ enum vect_reduction_type *v_reduc_type)
|
|
+{
|
|
+ gphi *phi = as_a <gphi *> (phi_info->stmt);
|
|
+ class loop *loop = (gimple_bb (phi))->loop_father;
|
|
+ class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
|
+ bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
|
|
+ gimple *phi_use_stmt = NULL;
|
|
+ enum tree_code orig_code, code;
|
|
+ tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
|
|
+ tree type;
|
|
+ tree name;
|
|
+ imm_use_iterator imm_iter;
|
|
+ use_operand_p use_p;
|
|
+ bool phi_def;
|
|
+
|
|
+ *double_reduc = false;
|
|
+ *v_reduc_type = TREE_CODE_REDUCTION;
|
|
+
|
|
+ tree phi_name = PHI_RESULT (phi);
|
|
+ /* ??? If there are no uses of the PHI result the inner loop reduction
|
|
+ won't be detected as possibly double-reduction by vectorizable_reduction
|
|
+ because that tries to walk the PHI arg from the preheader edge which
|
|
+ can be constant. See PR60382. */
|
|
+ if (has_zero_uses (phi_name))
|
|
+ return NULL;
|
|
+ unsigned nphi_def_loop_uses = 0;
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
|
|
+ {
|
|
+ gimple *use_stmt = USE_STMT (use_p);
|
|
+ if (is_gimple_debug (use_stmt))
|
|
+ continue;
|
|
+
|
|
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "intermediate value used outside loop.\n");
|
|
+
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ nphi_def_loop_uses++;
|
|
+ phi_use_stmt = use_stmt;
|
|
+ }
|
|
+
|
|
+ edge latch_e = loop_latch_edge (loop);
|
|
+ tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
|
|
+ if (TREE_CODE (loop_arg) != SSA_NAME)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "reduction: not ssa_name: %T\n", loop_arg);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
|
|
+ if (!def_stmt_info
|
|
+ || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
|
|
+ return NULL;
|
|
+
|
|
+ if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
|
|
+ {
|
|
+ name = gimple_assign_lhs (def_stmt);
|
|
+ phi_def = false;
|
|
+ }
|
|
+ else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
|
|
+ {
|
|
+ name = PHI_RESULT (def_stmt);
|
|
+ phi_def = true;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "reduction: unhandled reduction operation: %G",
|
|
+ def_stmt_info->stmt);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ unsigned nlatch_def_loop_uses = 0;
|
|
+ auto_vec<gphi *, 3> lcphis;
|
|
+ bool inner_loop_of_double_reduc = false;
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
|
|
+ {
|
|
+ gimple *use_stmt = USE_STMT (use_p);
|
|
+ if (is_gimple_debug (use_stmt))
|
|
+ continue;
|
|
+ if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
|
|
+ nlatch_def_loop_uses++;
|
|
+ else
|
|
+ {
|
|
+ /* We can have more than one loop-closed PHI. */
|
|
+ lcphis.safe_push (as_a <gphi *> (use_stmt));
|
|
+ if (nested_in_vect_loop
|
|
+ && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
|
|
+ == vect_double_reduction_def))
|
|
+ inner_loop_of_double_reduc = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* If this isn't a nested cycle or if the nested cycle reduction value
|
|
+ is used ouside of the inner loop we cannot handle uses of the reduction
|
|
+ value. */
|
|
+ if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
|
|
+ && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "reduction used in loop.\n");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ /* If DEF_STMT is a phi node itself, we expect it to have a single argument
|
|
+ defined in the inner loop. */
|
|
+ if (phi_def)
|
|
+ {
|
|
+ gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
|
|
+ op1 = PHI_ARG_DEF (def_stmt, 0);
|
|
+
|
|
+ if (gimple_phi_num_args (def_stmt) != 1
|
|
+ || TREE_CODE (op1) != SSA_NAME)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "unsupported phi node definition.\n");
|
|
+
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ gimple *def1 = SSA_NAME_DEF_STMT (op1);
|
|
+ if (gimple_bb (def1)
|
|
+ && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
|
+ && loop->inner
|
|
+ && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
|
|
+ && is_gimple_assign (def1)
|
|
+ && is_a <gphi *> (phi_use_stmt)
|
|
+ && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt,
|
|
+ "detected double reduction: ");
|
|
+
|
|
+ *double_reduc = true;
|
|
+ return def_stmt_info;
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ /* If we are vectorizing an inner reduction we are executing that
|
|
+ in the original order only in case we are not dealing with a
|
|
+ double reduction. */
|
|
+ bool check_reduction = true;
|
|
+ if (flow_loop_nested_p (vect_loop, loop))
|
|
+ {
|
|
+ gphi *lcphi;
|
|
+ unsigned i;
|
|
+ check_reduction = false;
|
|
+ FOR_EACH_VEC_ELT (lcphis, i, lcphi)
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
|
|
+ {
|
|
+ gimple *use_stmt = USE_STMT (use_p);
|
|
+ if (is_gimple_debug (use_stmt))
|
|
+ continue;
|
|
+ if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
|
|
+ check_reduction = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
|
|
+ code = orig_code = gimple_assign_rhs_code (def_stmt);
|
|
+
|
|
+ if (nested_in_vect_loop && !check_reduction)
|
|
+ {
|
|
+ /* FIXME: Even for non-reductions code generation is funneled
|
|
+ through vectorizable_reduction for the stmt defining the
|
|
+ PHI latch value. So we have to artificially restrict ourselves
|
|
+ for the supported operations. */
|
|
+ switch (get_gimple_rhs_class (code))
|
|
+ {
|
|
+ case GIMPLE_BINARY_RHS:
|
|
+ case GIMPLE_TERNARY_RHS:
|
|
+ break;
|
|
+ default:
|
|
+ /* Not supported by vectorizable_reduction. */
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
+ "nested cycle: not handled operation: ");
|
|
+ return NULL;
|
|
+ }
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
|
|
+ return def_stmt_info;
|
|
+ }
|
|
+
|
|
+ /* We can handle "res -= x[i]", which is non-associative by
|
|
+ simply rewriting this into "res += -x[i]". Avoid changing
|
|
+ gimple instruction for the first simple tests and only do this
|
|
+ if we're allowed to change code at all. */
|
|
+ if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
|
|
+ code = PLUS_EXPR;
|
|
+
|
|
+ if (code == COND_EXPR)
|
|
+ {
|
|
+ if (! nested_in_vect_loop)
|
|
+ *v_reduc_type = COND_REDUCTION;
|
|
+
|
|
+ op3 = gimple_assign_rhs1 (def_stmt);
|
|
+ if (COMPARISON_CLASS_P (op3))
|
|
+ {
|
|
+ op4 = TREE_OPERAND (op3, 1);
|
|
+ op3 = TREE_OPERAND (op3, 0);
|
|
+ }
|
|
+ if (op3 == phi_name || op4 == phi_name)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
+ "reduction: condition depends on previous"
|
|
+ " iteration: ");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ op1 = gimple_assign_rhs2 (def_stmt);
|
|
+ op2 = gimple_assign_rhs3 (def_stmt);
|
|
+ }
|
|
+ else if (!commutative_tree_code (code) || !associative_tree_code (code))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
+ "reduction: not commutative/associative: ");
|
|
+ return NULL;
|
|
+ }
|
|
+ else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
|
|
+ {
|
|
+ op1 = gimple_assign_rhs1 (def_stmt);
|
|
+ op2 = gimple_assign_rhs2 (def_stmt);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
+ "reduction: not handled operation: ");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
+ "reduction: both uses not ssa_names: ");
|
|
+
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ type = TREE_TYPE (gimple_assign_lhs (def_stmt));
|
|
+ if ((TREE_CODE (op1) == SSA_NAME
|
|
+ && !types_compatible_p (type,TREE_TYPE (op1)))
|
|
+ || (TREE_CODE (op2) == SSA_NAME
|
|
+ && !types_compatible_p (type, TREE_TYPE (op2)))
|
|
+ || (op3 && TREE_CODE (op3) == SSA_NAME
|
|
+ && !types_compatible_p (type, TREE_TYPE (op3)))
|
|
+ || (op4 && TREE_CODE (op4) == SSA_NAME
|
|
+ && !types_compatible_p (type, TREE_TYPE (op4))))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ {
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "reduction: multiple types: operation type: "
|
|
+ "%T, operands types: %T,%T",
|
|
+ type, TREE_TYPE (op1), TREE_TYPE (op2));
|
|
+ if (op3)
|
|
+ dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
|
|
+
|
|
+ if (op4)
|
|
+ dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
|
|
+ dump_printf (MSG_NOTE, "\n");
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ /* Check whether it's ok to change the order of the computation.
|
|
+ Generally, when vectorizing a reduction we change the order of the
|
|
+ computation. This may change the behavior of the program in some
|
|
+ cases, so we need to check that this is ok. One exception is when
|
|
+ vectorizing an outer-loop: the inner-loop is executed sequentially,
|
|
+ and therefore vectorizing reductions in the inner-loop during
|
|
+ outer-loop vectorization is safe. */
|
|
+ if (check_reduction
|
|
+ && *v_reduc_type == TREE_CODE_REDUCTION
|
|
+ && parloops_needs_fold_left_reduction_p (type, code,
|
|
+ need_wrapping_integral_overflow))
|
|
+ *v_reduc_type = FOLD_LEFT_REDUCTION;
|
|
+
|
|
+ /* Reduction is safe. We're dealing with one of the following:
|
|
+ 1) integer arithmetic and no trapv
|
|
+ 2) floating point arithmetic, and special flags permit this optimization
|
|
+ 3) nested cycle (i.e., outer loop vectorization). */
|
|
+ stmt_vec_info def1_info = loop_info->lookup_def (op1);
|
|
+ stmt_vec_info def2_info = loop_info->lookup_def (op2);
|
|
+ if (code != COND_EXPR && !def1_info && !def2_info)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt,
|
|
+ "reduction: no defs for operands: ");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ /* Check that one def is the reduction def, defined by PHI,
|
|
+ the other def is either defined in the loop ("vect_internal_def"),
|
|
+ or it's an induction (defined by a loop-header phi-node). */
|
|
+
|
|
+ if (def2_info
|
|
+ && def2_info->stmt == phi
|
|
+ && (code == COND_EXPR
|
|
+ || !def1_info
|
|
+ || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
|
|
+ || parloops_valid_reduction_input_p (def1_info)))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
|
|
+ return def_stmt_info;
|
|
+ }
|
|
+
|
|
+ if (def1_info
|
|
+ && def1_info->stmt == phi
|
|
+ && (code == COND_EXPR
|
|
+ || !def2_info
|
|
+ || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
|
|
+ || parloops_valid_reduction_input_p (def2_info)))
|
|
+ {
|
|
+ if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
|
|
+ {
|
|
+ /* Check if we can swap operands (just for simplicity - so that
|
|
+ the rest of the code can assume that the reduction variable
|
|
+ is always the last (second) argument). */
|
|
+ if (code == COND_EXPR)
|
|
+ {
|
|
+ /* Swap cond_expr by inverting the condition. */
|
|
+ tree cond_expr = gimple_assign_rhs1 (def_stmt);
|
|
+ enum tree_code invert_code = ERROR_MARK;
|
|
+ enum tree_code cond_code = TREE_CODE (cond_expr);
|
|
+
|
|
+ if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
|
|
+ {
|
|
+ bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
|
|
+ invert_code = invert_tree_comparison (cond_code, honor_nans);
|
|
+ }
|
|
+ if (invert_code != ERROR_MARK)
|
|
+ {
|
|
+ TREE_SET_CODE (cond_expr, invert_code);
|
|
+ swap_ssa_operands (def_stmt,
|
|
+ gimple_assign_rhs2_ptr (def_stmt),
|
|
+ gimple_assign_rhs3_ptr (def_stmt));
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt,
|
|
+ "detected reduction: cannot swap operands "
|
|
+ "for cond_expr");
|
|
+ return NULL;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
|
|
+ gimple_assign_rhs2_ptr (def_stmt));
|
|
+
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt,
|
|
+ "detected reduction: need to swap operands: ");
|
|
+
|
|
+ if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
|
|
+ LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
|
|
+ }
|
|
+
|
|
+ return def_stmt_info;
|
|
+ }
|
|
+
|
|
+ /* Try to find SLP reduction chain. */
|
|
+ if (! nested_in_vect_loop
|
|
+ && code != COND_EXPR
|
|
+ && orig_code != MINUS_EXPR
|
|
+ && parloops_is_slp_reduction (loop_info, phi, def_stmt))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_ploop_op (MSG_NOTE, def_stmt,
|
|
+ "reduction: detected reduction chain: ");
|
|
+
|
|
+ return def_stmt_info;
|
|
+ }
|
|
+
|
|
+ /* Look for the expression computing loop_arg from loop PHI result. */
|
|
+ if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
|
|
+ return def_stmt_info;
|
|
+
|
|
+ if (dump_enabled_p ())
|
|
+ {
|
|
+ report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
+ "reduction: unknown pattern: ");
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+/* Wrapper around vect_is_simple_reduction, which will modify code
|
|
+ in-place if it enables detection of more reductions. Arguments
|
|
+ as there. */
|
|
+
|
|
+stmt_vec_info
|
|
+parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
+ bool *double_reduc,
|
|
+ bool need_wrapping_integral_overflow)
|
|
+{
|
|
+ enum vect_reduction_type v_reduc_type;
|
|
+ stmt_vec_info def_info
|
|
+ = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
|
|
+ need_wrapping_integral_overflow,
|
|
+ &v_reduc_type);
|
|
+ if (def_info)
|
|
+ {
|
|
+ STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
|
|
+ STMT_VINFO_REDUC_DEF (phi_info) = def_info;
|
|
+ STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
|
|
+ STMT_VINFO_REDUC_DEF (def_info) = phi_info;
|
|
+ }
|
|
+ return def_info;
|
|
+}
|
|
+
|
|
/* Minimal number of iterations of a loop that should be executed in each
|
|
thread. */
|
|
#define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
|
|
@@ -2614,9 +3326,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
|
|
continue;
|
|
|
|
stmt_vec_info reduc_stmt_info
|
|
- = vect_force_simple_reduction (simple_loop_info,
|
|
- simple_loop_info->lookup_stmt (phi),
|
|
- &double_reduc, true);
|
|
+ = parloops_force_simple_reduction (simple_loop_info,
|
|
+ simple_loop_info->lookup_stmt (phi),
|
|
+ &double_reduc, true);
|
|
if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
|
|
continue;
|
|
|
|
@@ -2663,9 +3375,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
|
|
stmt_vec_info inner_phi_info
|
|
= simple_loop_info->lookup_stmt (inner_phi);
|
|
stmt_vec_info inner_reduc_stmt_info
|
|
- = vect_force_simple_reduction (simple_loop_info,
|
|
- inner_phi_info,
|
|
- &double_reduc, true);
|
|
+ = parloops_force_simple_reduction (simple_loop_info,
|
|
+ inner_phi_info,
|
|
+ &double_reduc, true);
|
|
gcc_assert (!double_reduc);
|
|
if (!inner_reduc_stmt_info
|
|
|| !valid_reduction_p (inner_reduc_stmt_info))
|
|
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
|
|
index 01f095382d6..54e8adc8d7c 100644
|
|
--- a/gcc/tree-ssa-alias.c
|
|
+++ b/gcc/tree-ssa-alias.c
|
|
@@ -2535,13 +2535,36 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
|
|
case BUILT_IN_MEMSET_CHK:
|
|
case BUILT_IN_STRNCPY:
|
|
case BUILT_IN_STPNCPY:
|
|
+ case BUILT_IN_CALLOC:
|
|
{
|
|
/* For a must-alias check we need to be able to constrain
|
|
the access properly. */
|
|
if (!ref->max_size_known_p ())
|
|
return false;
|
|
- tree dest = gimple_call_arg (stmt, 0);
|
|
- tree len = gimple_call_arg (stmt, 2);
|
|
+ tree dest;
|
|
+ tree len;
|
|
+
|
|
+ /* In execution order a calloc call will never kill
|
|
+ anything. However, DSE will (ab)use this interface
|
|
+ to ask if a calloc call writes the same memory locations
|
|
+ as a later assignment, memset, etc. So handle calloc
|
|
+ in the expected way. */
|
|
+ if (DECL_FUNCTION_CODE (callee) == BUILT_IN_CALLOC)
|
|
+ {
|
|
+ tree arg0 = gimple_call_arg (stmt, 0);
|
|
+ tree arg1 = gimple_call_arg (stmt, 1);
|
|
+ if (TREE_CODE (arg0) != INTEGER_CST
|
|
+ || TREE_CODE (arg1) != INTEGER_CST)
|
|
+ return false;
|
|
+
|
|
+ dest = gimple_call_lhs (stmt);
|
|
+ len = fold_build2 (MULT_EXPR, TREE_TYPE (arg0), arg0, arg1);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ dest = gimple_call_arg (stmt, 0);
|
|
+ len = gimple_call_arg (stmt, 2);
|
|
+ }
|
|
if (!poly_int_tree_p (len))
|
|
return false;
|
|
tree rbase = ref->base;
|
|
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
|
|
index efe5b31cc0a..c20fbe048ed 100644
|
|
--- a/gcc/tree-ssa-dse.c
|
|
+++ b/gcc/tree-ssa-dse.c
|
|
@@ -1,4 +1,4 @@
|
|
-/* Dead store elimination
|
|
+/* Dead and redundant store elimination
|
|
Copyright (C) 2004-2019 Free Software Foundation, Inc.
|
|
|
|
This file is part of GCC.
|
|
@@ -36,17 +36,26 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "params.h"
|
|
#include "alias.h"
|
|
#include "tree-ssa-loop.h"
|
|
+#include "tree-ssa-dse.h"
|
|
|
|
/* This file implements dead store elimination.
|
|
|
|
A dead store is a store into a memory location which will later be
|
|
overwritten by another store without any intervening loads. In this
|
|
- case the earlier store can be deleted.
|
|
+ case the earlier store can be deleted or trimmed if the store
|
|
+ was partially dead.
|
|
+
|
|
+ A redundant store is a store into a memory location which stores
|
|
+ the exact same value as a prior store to the same memory location.
|
|
+ While this can often be handled by dead store elimination, removing
|
|
+ the redundant store is often better than removing or trimming the
|
|
+ dead store.
|
|
|
|
In our SSA + virtual operand world we use immediate uses of virtual
|
|
- operands to detect dead stores. If a store's virtual definition
|
|
+ operands to detect these cases. If a store's virtual definition
|
|
is used precisely once by a later store to the same location which
|
|
- post dominates the first store, then the first store is dead.
|
|
+ post dominates the first store, then the first store is dead. If
|
|
+ the data stored is the same, then the second store is redundant.
|
|
|
|
The single use of the store's virtual definition ensures that
|
|
there are no intervening aliased loads and the requirement that
|
|
@@ -58,7 +67,9 @@ along with GCC; see the file COPYING3. If not see
|
|
the point immediately before the later store. Again, the single
|
|
use of the virtual definition and the post-dominance relationship
|
|
ensure that such movement would be safe. Clearly if there are
|
|
- back to back stores, then the second is redundant.
|
|
+ back to back stores, then the second is makes the first dead. If
|
|
+ the second store stores the same value, then the second store is
|
|
+ redundant.
|
|
|
|
Reviewing section 10.7.2 in Morgan's "Building an Optimizing Compiler"
|
|
may also help in understanding this code since it discusses the
|
|
@@ -66,19 +77,13 @@ along with GCC; see the file COPYING3. If not see
|
|
fact, they are the same transformation applied to different views of
|
|
the CFG. */
|
|
|
|
+void delete_dead_or_redundant_assignment (gimple_stmt_iterator *, const char *);
|
|
+static void delete_dead_or_redundant_call (gimple_stmt_iterator *, const char *);
|
|
|
|
/* Bitmap of blocks that have had EH statements cleaned. We should
|
|
remove their dead edges eventually. */
|
|
static bitmap need_eh_cleanup;
|
|
|
|
-/* Return value from dse_classify_store */
|
|
-enum dse_store_status
|
|
-{
|
|
- DSE_STORE_LIVE,
|
|
- DSE_STORE_MAYBE_PARTIAL_DEAD,
|
|
- DSE_STORE_DEAD
|
|
-};
|
|
-
|
|
/* STMT is a statement that may write into memory. Analyze it and
|
|
initialize WRITE to describe how STMT affects memory.
|
|
|
|
@@ -106,6 +111,25 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
|
|
ao_ref_init_from_ptr_and_size (write, ptr, size);
|
|
return true;
|
|
}
|
|
+
|
|
+ /* A calloc call can never be dead, but it can make
|
|
+ subsequent stores redundant if they store 0 into
|
|
+ the same memory locations. */
|
|
+ case BUILT_IN_CALLOC:
|
|
+ {
|
|
+ tree nelem = gimple_call_arg (stmt, 0);
|
|
+ tree selem = gimple_call_arg (stmt, 1);
|
|
+ if (TREE_CODE (nelem) == INTEGER_CST
|
|
+ && TREE_CODE (selem) == INTEGER_CST)
|
|
+ {
|
|
+ tree lhs = gimple_call_lhs (stmt);
|
|
+ tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem),
|
|
+ nelem, selem);
|
|
+ ao_ref_init_from_ptr_and_size (write, lhs, size);
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+
|
|
default:
|
|
break;
|
|
}
|
|
@@ -551,16 +575,84 @@ check_name (tree, tree *idx, void *data)
|
|
return true;
|
|
}
|
|
|
|
+/* STMT stores the value 0 into one or more memory locations
|
|
+ (via memset, empty constructor, calloc call, etc).
|
|
+
|
|
+ See if there is a subsequent store of the value 0 to one
|
|
+ or more of the same memory location(s). If so, the subsequent
|
|
+ store is redundant and can be removed.
|
|
+
|
|
+ The subsequent stores could be via memset, empty constructors,
|
|
+ simple MEM stores, etc. */
|
|
+
|
|
+static void
|
|
+dse_optimize_redundant_stores (gimple *stmt)
|
|
+{
|
|
+ int cnt = 0;
|
|
+
|
|
+ /* We could do something fairly complex and look through PHIs
|
|
+ like DSE_CLASSIFY_STORE, but it doesn't seem to be worth
|
|
+ the effort.
|
|
+
|
|
+ Look at all the immediate uses of the VDEF (which are obviously
|
|
+ dominated by STMT). See if one or more stores 0 into the same
|
|
+ memory locations a STMT, if so remove the immediate use statements. */
|
|
+ tree defvar = gimple_vdef (stmt);
|
|
+ imm_use_iterator ui;
|
|
+ gimple *use_stmt;
|
|
+ FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
|
|
+ {
|
|
+ /* Limit stmt walking. */
|
|
+ if (++cnt > PARAM_VALUE (PARAM_DSE_MAX_ALIAS_QUERIES_PER_STORE))
|
|
+ BREAK_FROM_IMM_USE_STMT (ui);
|
|
+
|
|
+ /* If USE_STMT stores 0 into one or more of the same locations
|
|
+ as STMT and STMT would kill USE_STMT, then we can just remove
|
|
+ USE_STMT. */
|
|
+ tree fndecl;
|
|
+ if ((is_gimple_assign (use_stmt)
|
|
+ && gimple_vdef (use_stmt)
|
|
+ && ((gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR
|
|
+ && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (use_stmt)) == 0
|
|
+ && !gimple_clobber_p (stmt))
|
|
+ || (gimple_assign_rhs_code (use_stmt) == INTEGER_CST
|
|
+ && integer_zerop (gimple_assign_rhs1 (use_stmt)))))
|
|
+ || (gimple_call_builtin_p (use_stmt, BUILT_IN_NORMAL)
|
|
+ && (fndecl = gimple_call_fndecl (use_stmt)) != NULL
|
|
+ && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
|
|
+ || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
|
|
+ && integer_zerop (gimple_call_arg (use_stmt, 1))))
|
|
+ {
|
|
+ ao_ref write;
|
|
+
|
|
+ if (!initialize_ao_ref_for_dse (use_stmt, &write))
|
|
+ BREAK_FROM_IMM_USE_STMT (ui)
|
|
+
|
|
+ if (valid_ao_ref_for_dse (&write)
|
|
+ && stmt_kills_ref_p (stmt, &write))
|
|
+ {
|
|
+ gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
|
|
+ if (is_gimple_assign (use_stmt))
|
|
+ delete_dead_or_redundant_assignment (&gsi, "redundant");
|
|
+ else if (is_gimple_call (use_stmt))
|
|
+ delete_dead_or_redundant_call (&gsi, "redundant");
|
|
+ else
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
/* A helper of dse_optimize_stmt.
|
|
Given a GIMPLE_ASSIGN in STMT that writes to REF, classify it
|
|
according to downstream uses and defs. Sets *BY_CLOBBER_P to true
|
|
if only clobber statements influenced the classification result.
|
|
Returns the classification. */
|
|
|
|
-static dse_store_status
|
|
+dse_store_status
|
|
dse_classify_store (ao_ref *ref, gimple *stmt,
|
|
bool byte_tracking_enabled, sbitmap live_bytes,
|
|
- bool *by_clobber_p = NULL)
|
|
+ bool *by_clobber_p, tree stop_at_vuse)
|
|
{
|
|
gimple *temp;
|
|
int cnt = 0;
|
|
@@ -596,6 +688,11 @@ dse_classify_store (ao_ref *ref, gimple *stmt,
|
|
}
|
|
else
|
|
defvar = gimple_vdef (temp);
|
|
+
|
|
+ /* If we're instructed to stop walking at region boundary, do so. */
|
|
+ if (defvar == stop_at_vuse)
|
|
+ return DSE_STORE_LIVE;
|
|
+
|
|
auto_vec<gimple *, 10> defs;
|
|
gimple *phi_def = NULL;
|
|
FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
|
|
@@ -763,12 +860,12 @@ private:
|
|
|
|
/* Delete a dead call at GSI, which is mem* call of some kind. */
|
|
static void
|
|
-delete_dead_call (gimple_stmt_iterator *gsi)
|
|
+delete_dead_or_redundant_call (gimple_stmt_iterator *gsi, const char *type)
|
|
{
|
|
gimple *stmt = gsi_stmt (*gsi);
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
- fprintf (dump_file, " Deleted dead call: ");
|
|
+ fprintf (dump_file, " Deleted %s call: ", type);
|
|
print_gimple_stmt (dump_file, stmt, 0, dump_flags);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
@@ -796,13 +893,13 @@ delete_dead_call (gimple_stmt_iterator *gsi)
|
|
|
|
/* Delete a dead store at GSI, which is a gimple assignment. */
|
|
|
|
-static void
|
|
-delete_dead_assignment (gimple_stmt_iterator *gsi)
|
|
+void
|
|
+delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, const char *type)
|
|
{
|
|
gimple *stmt = gsi_stmt (*gsi);
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
|
{
|
|
- fprintf (dump_file, " Deleted dead store: ");
|
|
+ fprintf (dump_file, " Deleted %s store: ", type);
|
|
print_gimple_stmt (dump_file, stmt, 0, dump_flags);
|
|
fprintf (dump_file, "\n");
|
|
}
|
|
@@ -855,7 +952,8 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
|
|
some builtin calls. */
|
|
if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
|
|
{
|
|
- switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
|
|
+ tree fndecl = gimple_call_fndecl (stmt);
|
|
+ switch (DECL_FUNCTION_CODE (fndecl))
|
|
{
|
|
case BUILT_IN_MEMCPY:
|
|
case BUILT_IN_MEMMOVE:
|
|
@@ -867,10 +965,18 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
|
|
tree size = gimple_call_arg (stmt, 2);
|
|
if (integer_zerop (size))
|
|
{
|
|
- delete_dead_call (gsi);
|
|
+ delete_dead_or_redundant_call (gsi, "dead");
|
|
return;
|
|
}
|
|
|
|
+ /* If this is a memset call that initializes an object
|
|
+ to zero, it may be redundant with an earlier memset
|
|
+ or empty CONSTRUCTOR of a larger object. */
|
|
+ if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
|
|
+ || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
|
|
+ && integer_zerop (gimple_call_arg (stmt, 1)))
|
|
+ dse_optimize_redundant_stores (stmt);
|
|
+
|
|
enum dse_store_status store_status;
|
|
m_byte_tracking_enabled
|
|
= setup_live_bytes_from_ref (&ref, m_live_bytes);
|
|
@@ -887,10 +993,14 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
|
|
}
|
|
|
|
if (store_status == DSE_STORE_DEAD)
|
|
- delete_dead_call (gsi);
|
|
+ delete_dead_or_redundant_call (gsi, "dead");
|
|
return;
|
|
}
|
|
|
|
+ case BUILT_IN_CALLOC:
|
|
+ /* We already know the arguments are integer constants. */
|
|
+ dse_optimize_redundant_stores (stmt);
|
|
+
|
|
default:
|
|
return;
|
|
}
|
|
@@ -900,6 +1010,18 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
|
|
{
|
|
bool by_clobber_p = false;
|
|
|
|
+ /* First see if this store is a CONSTRUCTOR and if there
|
|
+ are subsequent CONSTRUCTOR stores which are totally
|
|
+ subsumed by this statement. If so remove the subsequent
|
|
+ CONSTRUCTOR store.
|
|
+
|
|
+ This will tend to make fewer calls into memset with longer
|
|
+ arguments. */
|
|
+ if (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
|
|
+ && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)) == 0
|
|
+ && !gimple_clobber_p (stmt))
|
|
+ dse_optimize_redundant_stores (stmt);
|
|
+
|
|
/* Self-assignments are zombies. */
|
|
if (operand_equal_p (gimple_assign_rhs1 (stmt),
|
|
gimple_assign_lhs (stmt), 0))
|
|
@@ -930,7 +1052,7 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
|
|
&& !by_clobber_p)
|
|
return;
|
|
|
|
- delete_dead_assignment (gsi);
|
|
+ delete_dead_or_redundant_assignment (gsi, "dead");
|
|
}
|
|
}
|
|
|
|
diff --git a/gcc/tree-ssa-dse.h b/gcc/tree-ssa-dse.h
|
|
new file mode 100644
|
|
index 00000000000..a5eccbd746d
|
|
--- /dev/null
|
|
+++ b/gcc/tree-ssa-dse.h
|
|
@@ -0,0 +1,36 @@
|
|
+/* Support routines for dead store elimination.
|
|
+ Copyright (C) 2019 Free Software Foundation, Inc.
|
|
+
|
|
+This file is part of GCC.
|
|
+
|
|
+GCC is free software; you can redistribute it and/or modify
|
|
+it under the terms of the GNU General Public License as published by
|
|
+the Free Software Foundation; either version 3, or (at your option)
|
|
+any later version.
|
|
+
|
|
+GCC is distributed in the hope that it will be useful,
|
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+GNU General Public License for more details.
|
|
+
|
|
+You should have received a copy of the GNU General Public License
|
|
+along with GCC; see the file COPYING3. If not see
|
|
+<http://www.gnu.org/licenses/>. */
|
|
+
|
|
+#ifndef GCC_TREE_SSA_DSE_H
|
|
+#define GCC_TREE_SSA_DSE_H
|
|
+
|
|
+/* Return value from dse_classify_store */
|
|
+enum dse_store_status
|
|
+{
|
|
+ DSE_STORE_LIVE,
|
|
+ DSE_STORE_MAYBE_PARTIAL_DEAD,
|
|
+ DSE_STORE_DEAD
|
|
+};
|
|
+
|
|
+dse_store_status dse_classify_store (ao_ref *, gimple *, bool, sbitmap,
|
|
+ bool * = NULL, tree = NULL);
|
|
+
|
|
+void delete_dead_or_redundant_assignment (gimple_stmt_iterator *, const char *);
|
|
+
|
|
+#endif /* GCC_TREE_SSA_DSE_H */
|
|
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
|
|
index d241becd481..2d54e13b180 100644
|
|
--- a/gcc/tree-ssa-loop-niter.c
|
|
+++ b/gcc/tree-ssa-loop-niter.c
|
|
@@ -1928,7 +1928,7 @@ number_of_iterations_cond (struct loop *loop,
|
|
|
|
tree
|
|
simplify_replace_tree (tree expr, tree old, tree new_tree,
|
|
- tree (*valueize) (tree))
|
|
+ tree (*valueize) (tree, void*), void *context)
|
|
{
|
|
unsigned i, n;
|
|
tree ret = NULL_TREE, e, se;
|
|
@@ -1944,7 +1944,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
|
|
{
|
|
if (TREE_CODE (expr) == SSA_NAME)
|
|
{
|
|
- new_tree = valueize (expr);
|
|
+ new_tree = valueize (expr, context);
|
|
if (new_tree != expr)
|
|
return new_tree;
|
|
}
|
|
@@ -1960,7 +1960,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
|
|
for (i = 0; i < n; i++)
|
|
{
|
|
e = TREE_OPERAND (expr, i);
|
|
- se = simplify_replace_tree (e, old, new_tree, valueize);
|
|
+ se = simplify_replace_tree (e, old, new_tree, valueize, context);
|
|
if (e == se)
|
|
continue;
|
|
|
|
diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
|
|
index dc116489218..fb192d2c250 100644
|
|
--- a/gcc/tree-ssa-loop-niter.h
|
|
+++ b/gcc/tree-ssa-loop-niter.h
|
|
@@ -53,7 +53,9 @@ extern bool scev_probably_wraps_p (tree, tree, tree, gimple *,
|
|
struct loop *, bool);
|
|
extern void free_numbers_of_iterations_estimates (struct loop *);
|
|
extern void free_numbers_of_iterations_estimates (function *);
|
|
-extern tree simplify_replace_tree (tree, tree, tree, tree (*)(tree) = NULL);
|
|
+extern tree simplify_replace_tree (tree, tree,
|
|
+ tree, tree (*)(tree, void *) = NULL,
|
|
+ void * = NULL);
|
|
extern void substitute_in_loop_info (struct loop *, tree, tree);
|
|
|
|
#endif /* GCC_TREE_SSA_LOOP_NITER_H */
|
|
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
|
|
index 00a09508836..551718637f1 100644
|
|
--- a/gcc/tree-ssa-loop.c
|
|
+++ b/gcc/tree-ssa-loop.c
|
|
@@ -768,9 +768,9 @@ get_lsm_tmp_name (tree ref, unsigned n, const char *suffix)
|
|
ns[1] = 0;
|
|
lsm_tmp_name_add (ns);
|
|
}
|
|
- return lsm_tmp_name;
|
|
if (suffix != NULL)
|
|
lsm_tmp_name_add (suffix);
|
|
+ return lsm_tmp_name;
|
|
}
|
|
|
|
/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS. */
|
|
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
|
|
index 6794fbde29e..9c1a9a651fe 100644
|
|
--- a/gcc/tree-ssa-reassoc.c
|
|
+++ b/gcc/tree-ssa-reassoc.c
|
|
@@ -2039,9 +2039,6 @@ optimize_ops_list (enum tree_code opcode,
|
|
i++;
|
|
}
|
|
|
|
- length = ops->length ();
|
|
- oelast = ops->last ();
|
|
-
|
|
if (iterate)
|
|
optimize_ops_list (opcode, ops);
|
|
}
|
|
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
|
|
index 95fbead7b1e..cd5a3a75eaa 100644
|
|
--- a/gcc/tree-ssa-sccvn.c
|
|
+++ b/gcc/tree-ssa-sccvn.c
|
|
@@ -309,6 +309,10 @@ static vn_tables_t valid_info;
|
|
/* Valueization hook. Valueize NAME if it is an SSA name, otherwise
|
|
just return it. */
|
|
tree (*vn_valueize) (tree);
|
|
+tree vn_valueize_wrapper (tree t, void* context ATTRIBUTE_UNUSED)
|
|
+{
|
|
+ return vn_valueize (t);
|
|
+}
|
|
|
|
|
|
/* This represents the top of the VN lattice, which is the universal
|
|
@@ -6364,7 +6368,7 @@ process_bb (rpo_elim &avail, basic_block bb,
|
|
if (bb->loop_father->nb_iterations)
|
|
bb->loop_father->nb_iterations
|
|
= simplify_replace_tree (bb->loop_father->nb_iterations,
|
|
- NULL_TREE, NULL_TREE, vn_valueize);
|
|
+ NULL_TREE, NULL_TREE, &vn_valueize_wrapper);
|
|
}
|
|
|
|
/* Value-number all defs in the basic-block. */
|
|
diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c
|
|
index 2648b24f7d5..98b6caced03 100644
|
|
--- a/gcc/tree-ssa-sink.c
|
|
+++ b/gcc/tree-ssa-sink.c
|
|
@@ -433,7 +433,6 @@ statement_sink_location (gimple *stmt, basic_block frombb,
|
|
|
|
if (gimple_code (use) != GIMPLE_PHI)
|
|
{
|
|
- sinkbb = gimple_bb (use);
|
|
sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
|
|
|
|
if (sinkbb == frombb)
|
|
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
|
|
index c3ea2d680d8..91494d76176 100644
|
|
--- a/gcc/tree-ssa-threadedge.c
|
|
+++ b/gcc/tree-ssa-threadedge.c
|
|
@@ -1299,7 +1299,6 @@ thread_across_edge (gcond *dummy_cond,
|
|
|
|
x = new jump_thread_edge (taken_edge, EDGE_COPY_SRC_JOINER_BLOCK);
|
|
path->safe_push (x);
|
|
- found = false;
|
|
found = thread_around_empty_blocks (taken_edge,
|
|
dummy_cond,
|
|
avail_exprs_stack,
|
|
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
|
index 17a4fc8e279..e822ffc1a01 100644
|
|
--- a/gcc/tree-vect-data-refs.c
|
|
+++ b/gcc/tree-vect-data-refs.c
|
|
@@ -2863,10 +2863,12 @@ strip_conversion (tree op)
|
|
}
|
|
|
|
/* Return true if vectorizable_* routines can handle statements STMT1_INFO
|
|
- and STMT2_INFO being in a single group. */
|
|
+ and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
|
|
+ be grouped in SLP mode. */
|
|
|
|
static bool
|
|
-can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
|
|
+can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
|
|
+ bool allow_slp_p)
|
|
{
|
|
if (gimple_assign_single_p (stmt1_info->stmt))
|
|
return gimple_assign_single_p (stmt2_info->stmt);
|
|
@@ -2888,7 +2890,8 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
|
|
like those created by build_mask_conversion. */
|
|
tree mask1 = gimple_call_arg (call1, 2);
|
|
tree mask2 = gimple_call_arg (call2, 2);
|
|
- if (!operand_equal_p (mask1, mask2, 0))
|
|
+ if (!operand_equal_p (mask1, mask2, 0)
|
|
+ && (ifn == IFN_MASK_STORE || !allow_slp_p))
|
|
{
|
|
mask1 = strip_conversion (mask1);
|
|
if (!mask1)
|
|
@@ -2974,7 +2977,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
|
|
|| data_ref_compare_tree (DR_BASE_ADDRESS (dra),
|
|
DR_BASE_ADDRESS (drb)) != 0
|
|
|| data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
|
|
- || !can_group_stmts_p (stmtinfo_a, stmtinfo_b))
|
|
+ || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
|
|
break;
|
|
|
|
/* Check that the data-refs have the same constant size. */
|
|
@@ -3059,6 +3062,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
|
|
DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
|
|
lastinfo = stmtinfo_b;
|
|
|
|
+ STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
|
|
+ = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
|
|
+
|
|
+ if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "Load suitable for SLP vectorization only.\n");
|
|
+
|
|
if (init_b == init_prev
|
|
&& !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
|
|
&& dump_enabled_p ())
|
|
@@ -3446,7 +3456,6 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
|
/* First, we collect all data ref pairs for aliasing checks. */
|
|
FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
|
|
{
|
|
- int comp_res;
|
|
poly_uint64 lower_bound;
|
|
tree segment_length_a, segment_length_b;
|
|
unsigned HOST_WIDE_INT access_size_a, access_size_b;
|
|
@@ -3478,10 +3487,13 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
|
dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
|
|
stmt_vec_info stmt_info_b = dr_info_b->stmt;
|
|
|
|
+ bool preserves_scalar_order_p
|
|
+ = vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
|
|
+
|
|
/* Skip the pair if inter-iteration dependencies are irrelevant
|
|
and intra-iteration dependencies are guaranteed to be honored. */
|
|
if (ignore_step_p
|
|
- && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b)
|
|
+ && (preserves_scalar_order_p
|
|
|| vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
|
|
&lower_bound)))
|
|
{
|
|
@@ -3562,14 +3574,11 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
|
align_a = vect_vfa_align (dr_info_a);
|
|
align_b = vect_vfa_align (dr_info_b);
|
|
|
|
- comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr),
|
|
- DR_BASE_ADDRESS (dr_info_b->dr));
|
|
- if (comp_res == 0)
|
|
- comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr),
|
|
- DR_OFFSET (dr_info_b->dr));
|
|
-
|
|
/* See whether the alias is known at compilation time. */
|
|
- if (comp_res == 0
|
|
+ if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
|
|
+ DR_BASE_ADDRESS (dr_info_b->dr), 0)
|
|
+ && operand_equal_p (DR_OFFSET (dr_info_a->dr),
|
|
+ DR_OFFSET (dr_info_b->dr), 0)
|
|
&& TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
|
|
&& TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
|
|
&& poly_int_tree_p (segment_length_a)
|
|
@@ -3602,15 +3611,21 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
|
stmt_info_b->stmt);
|
|
}
|
|
|
|
- dr_with_seg_len_pair_t dr_with_seg_len_pair
|
|
- (dr_with_seg_len (dr_info_a->dr, segment_length_a,
|
|
- access_size_a, align_a),
|
|
- dr_with_seg_len (dr_info_b->dr, segment_length_b,
|
|
- access_size_b, align_b));
|
|
+ dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
|
|
+ access_size_a, align_a);
|
|
+ dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
|
|
+ access_size_b, align_b);
|
|
+ /* Canonicalize the order to be the one that's needed for accurate
|
|
+ RAW, WAR and WAW flags, in cases where the data references are
|
|
+ well-ordered. The order doesn't really matter otherwise,
|
|
+ but we might as well be consistent. */
|
|
+ if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
|
|
+ std::swap (dr_a, dr_b);
|
|
|
|
- /* Canonicalize pairs by sorting the two DR members. */
|
|
- if (comp_res > 0)
|
|
- std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
|
|
+ dr_with_seg_len_pair_t dr_with_seg_len_pair
|
|
+ (dr_a, dr_b, (preserves_scalar_order_p
|
|
+ ? dr_with_seg_len_pair_t::WELL_ORDERED
|
|
+ : dr_with_seg_len_pair_t::REORDERED));
|
|
|
|
comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
|
|
}
|
|
@@ -4123,7 +4138,7 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
|
|
*/
|
|
|
|
opt_result
|
|
-vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
|
|
+vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
|
|
{
|
|
struct loop *loop = NULL;
|
|
unsigned int i;
|
|
@@ -4298,7 +4313,7 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
|
|
/* Set vectype for STMT. */
|
|
scalar_type = TREE_TYPE (DR_REF (dr));
|
|
STMT_VINFO_VECTYPE (stmt_info)
|
|
- = get_vectype_for_scalar_type (scalar_type);
|
|
+ = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
if (!STMT_VINFO_VECTYPE (stmt_info))
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -4344,13 +4359,18 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
|
|
if (!vect_check_gather_scatter (stmt_info,
|
|
as_a <loop_vec_info> (vinfo),
|
|
&gs_info)
|
|
- || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
|
|
- return opt_result::failure_at
|
|
- (stmt_info->stmt,
|
|
- (gatherscatter == GATHER) ?
|
|
- "not vectorized: not suitable for gather load %G" :
|
|
- "not vectorized: not suitable for scatter store %G",
|
|
- stmt_info->stmt);
|
|
+ || !get_vectype_for_scalar_type (vinfo,
|
|
+ TREE_TYPE (gs_info.offset)))
|
|
+ {
|
|
+ if (fatal)
|
|
+ *fatal = false;
|
|
+ return opt_result::failure_at
|
|
+ (stmt_info->stmt,
|
|
+ (gatherscatter == GATHER)
|
|
+ ? "not vectorized: not suitable for gather load %G"
|
|
+ : "not vectorized: not suitable for scatter store %G",
|
|
+ stmt_info->stmt);
|
|
+ }
|
|
STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
|
|
}
|
|
}
|
|
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
|
|
index ad1ea4e7b97..39bc2a82b37 100644
|
|
--- a/gcc/tree-vect-generic.c
|
|
+++ b/gcc/tree-vect-generic.c
|
|
@@ -694,7 +694,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
|
|
tree zero, cst, cond, mask_type;
|
|
gimple *stmt;
|
|
|
|
- mask_type = build_same_sized_truth_vector_type (type);
|
|
+ mask_type = truth_type_for (type);
|
|
zero = build_zero_cst (type);
|
|
cond = build2 (LT_EXPR, mask_type, op0, zero);
|
|
tree_vector_builder vec (type, nunits, 1);
|
|
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
|
|
index b3fae5ba4da..20ede85633b 100644
|
|
--- a/gcc/tree-vect-loop-manip.c
|
|
+++ b/gcc/tree-vect-loop-manip.c
|
|
@@ -47,6 +47,9 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "stor-layout.h"
|
|
#include "optabs-query.h"
|
|
#include "vec-perm-indices.h"
|
|
+#include "insn-config.h"
|
|
+#include "rtl.h"
|
|
+#include "recog.h"
|
|
|
|
/*************************************************************************
|
|
Simple Loop Peeling Utilities
|
|
@@ -323,13 +326,18 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
|
|
tree src_masktype = src_rgm->mask_type;
|
|
tree dest_masktype = dest_rgm->mask_type;
|
|
machine_mode src_mode = TYPE_MODE (src_masktype);
|
|
+ insn_code icode1, icode2;
|
|
if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
|
|
- && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
|
|
- && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
|
|
+ && (icode1 = optab_handler (vec_unpacku_hi_optab,
|
|
+ src_mode)) != CODE_FOR_nothing
|
|
+ && (icode2 = optab_handler (vec_unpacku_lo_optab,
|
|
+ src_mode)) != CODE_FOR_nothing)
|
|
{
|
|
/* Unpacking the source masks gives at least as many mask bits as
|
|
we need. We can then VIEW_CONVERT any excess bits away. */
|
|
- tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
|
|
+ machine_mode dest_mode = insn_data[icode1].operand[0].mode;
|
|
+ gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
|
|
+ tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
|
|
for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
|
|
{
|
|
tree src = src_rgm->masks[i / 2];
|
|
@@ -1745,7 +1753,7 @@ vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
|
|
Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
|
|
CODE and NITERS are as for vect_update_inits_of_dr. */
|
|
|
|
-static void
|
|
+void
|
|
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
|
|
tree_code code)
|
|
{
|
|
@@ -1755,21 +1763,12 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
|
|
|
|
DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
|
|
|
|
- /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
|
|
+ /* Adjust niters to sizetype. We used to insert the stmts on loop preheader
|
|
+ here, but since we might use these niters to update the epilogues niters
|
|
+ and data references we can't insert them here as this definition might not
|
|
+ always dominate its uses. */
|
|
if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
|
|
- {
|
|
- gimple_seq seq;
|
|
- edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
|
|
- tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
|
|
-
|
|
- niters = fold_convert (sizetype, niters);
|
|
- niters = force_gimple_operand (niters, &seq, false, var);
|
|
- if (seq)
|
|
- {
|
|
- basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
|
|
- gcc_assert (!new_bb);
|
|
- }
|
|
- }
|
|
+ niters = fold_convert (sizetype, niters);
|
|
|
|
FOR_EACH_VEC_ELT (datarefs, i, dr)
|
|
{
|
|
@@ -2032,6 +2031,29 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
|
|
*niters_vector_mult_vf_ptr = niters_vector_mult_vf;
|
|
}
|
|
|
|
+/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
|
|
+ this function searches for the corresponding lcssa phi node in exit
|
|
+ bb of LOOP. If it is found, return the phi result; otherwise return
|
|
+ NULL. */
|
|
+
|
|
+static tree
|
|
+find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
|
|
+ gphi *lcssa_phi)
|
|
+{
|
|
+ gphi_iterator gsi;
|
|
+ edge e = single_exit (loop);
|
|
+
|
|
+ gcc_assert (single_pred_p (e->dest));
|
|
+ for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
+ {
|
|
+ gphi *phi = gsi.phi ();
|
|
+ if (operand_equal_p (PHI_ARG_DEF (phi, 0),
|
|
+ PHI_ARG_DEF (lcssa_phi, 0), 0))
|
|
+ return PHI_RESULT (phi);
|
|
+ }
|
|
+ return NULL_TREE;
|
|
+}
|
|
+
|
|
/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
|
|
from SECOND/FIRST and puts it at the original loop's preheader/exit
|
|
edge, the two loops are arranged as below:
|
|
@@ -2119,6 +2141,29 @@ slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
|
|
incoming edge. */
|
|
adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
|
|
}
|
|
+
|
|
+ /* For epilogue peeling we have to make sure to copy all LC PHIs
|
|
+ for correct vectorization of live stmts. */
|
|
+ if (loop == first)
|
|
+ {
|
|
+ basic_block orig_exit = single_exit (second)->dest;
|
|
+ for (gsi_orig = gsi_start_phis (orig_exit);
|
|
+ !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
|
|
+ {
|
|
+ gphi *orig_phi = gsi_orig.phi ();
|
|
+ tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
|
|
+ if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p (orig_arg))
|
|
+ continue;
|
|
+
|
|
+ /* Already created in the above loop. */
|
|
+ if (find_guard_arg (first, second, orig_phi))
|
|
+ continue;
|
|
+
|
|
+ tree new_res = copy_ssa_name (orig_arg);
|
|
+ gphi *lcphi = create_phi_node (new_res, between_bb);
|
|
+ add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
/* Function slpeel_add_loop_guard adds guard skipping from the beginning
|
|
@@ -2203,29 +2248,6 @@ slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
|
|
}
|
|
}
|
|
|
|
-/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
|
|
- this function searches for the corresponding lcssa phi node in exit
|
|
- bb of LOOP. If it is found, return the phi result; otherwise return
|
|
- NULL. */
|
|
-
|
|
-static tree
|
|
-find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
|
|
- gphi *lcssa_phi)
|
|
-{
|
|
- gphi_iterator gsi;
|
|
- edge e = single_exit (loop);
|
|
-
|
|
- gcc_assert (single_pred_p (e->dest));
|
|
- for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
- {
|
|
- gphi *phi = gsi.phi ();
|
|
- if (operand_equal_p (PHI_ARG_DEF (phi, 0),
|
|
- PHI_ARG_DEF (lcssa_phi, 0), 0))
|
|
- return PHI_RESULT (phi);
|
|
- }
|
|
- return NULL_TREE;
|
|
-}
|
|
-
|
|
/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
|
|
from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
|
|
point between the two loops to the end of EPILOG. Edges GUARD_EDGE
|
|
@@ -2296,12 +2318,14 @@ slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
|
|
{
|
|
gphi *update_phi = gsi.phi ();
|
|
tree old_arg = PHI_ARG_DEF (update_phi, 0);
|
|
- /* This loop-closed-phi actually doesn't represent a use out of the
|
|
- loop - the phi arg is a constant. */
|
|
- if (TREE_CODE (old_arg) != SSA_NAME)
|
|
- continue;
|
|
|
|
- tree merge_arg = get_current_def (old_arg);
|
|
+ tree merge_arg = NULL_TREE;
|
|
+
|
|
+ /* If the old argument is a SSA_NAME use its current_def. */
|
|
+ if (TREE_CODE (old_arg) == SSA_NAME)
|
|
+ merge_arg = get_current_def (old_arg);
|
|
+ /* If it's a constant or doesn't have a current_def, just use the old
|
|
+ argument. */
|
|
if (!merge_arg)
|
|
merge_arg = old_arg;
|
|
|
|
@@ -2412,7 +2436,22 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
|
|
|
|
Note this function peels prolog and epilog only if it's necessary,
|
|
as well as guards.
|
|
- Returns created epilogue or NULL.
|
|
+ This function returns the epilogue loop if a decision was made to vectorize
|
|
+ it, otherwise NULL.
|
|
+
|
|
+ The analysis resulting in this epilogue loop's loop_vec_info was performed
|
|
+ in the same vect_analyze_loop call as the main loop's. At that time
|
|
+ vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
|
|
+ vectorization factors than the main loop. This list is stored in the main
|
|
+ loop's loop_vec_info in the 'epilogue_vinfos' member. Everytime we decide to
|
|
+ vectorize the epilogue loop for a lower vectorization factor, the
|
|
+ loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
|
|
+ updated and linked to the epilogue loop. This is later used to vectorize
|
|
+ the epilogue. The reason the loop_vec_info needs updating is that it was
|
|
+ constructed based on the original main loop, and the epilogue loop is a
|
|
+ copy of this loop, so all links pointing to statements in the original loop
|
|
+ need updating. Furthermore, these loop_vec_infos share the
|
|
+ data_reference's records, which will also need to be updated.
|
|
|
|
TODO: Guard for prefer_scalar_loop should be emitted along with
|
|
versioning conditions if loop versioning is needed. */
|
|
@@ -2422,7 +2461,8 @@ struct loop *
|
|
vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
tree *niters_vector, tree *step_vector,
|
|
tree *niters_vector_mult_vf_var, int th,
|
|
- bool check_profitability, bool niters_no_overflow)
|
|
+ bool check_profitability, bool niters_no_overflow,
|
|
+ tree *advance, drs_init_vec &orig_drs_init)
|
|
{
|
|
edge e, guard_e;
|
|
tree type = TREE_TYPE (niters), guard_cond;
|
|
@@ -2430,6 +2470,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
profile_probability prob_prolog, prob_vector, prob_epilog;
|
|
int estimated_vf;
|
|
int prolog_peeling = 0;
|
|
+ bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
|
|
/* We currently do not support prolog peeling if the target alignment is not
|
|
known at compile time. 'vect_gen_prolog_loop_niters' depends on the
|
|
target alignment being constant. */
|
|
@@ -2483,19 +2524,77 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
int bound_prolog = 0;
|
|
if (prolog_peeling)
|
|
niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
|
|
- &bound_prolog);
|
|
+ &bound_prolog);
|
|
else
|
|
niters_prolog = build_int_cst (type, 0);
|
|
|
|
+ loop_vec_info epilogue_vinfo = NULL;
|
|
+ if (vect_epilogues)
|
|
+ {
|
|
+ epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
|
|
+ loop_vinfo->epilogue_vinfos.ordered_remove (0);
|
|
+ }
|
|
+
|
|
+ tree niters_vector_mult_vf = NULL_TREE;
|
|
+ /* Saving NITERs before the loop, as this may be changed by prologue. */
|
|
+ tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
|
|
+ edge update_e = NULL, skip_e = NULL;
|
|
+ unsigned int lowest_vf = constant_lower_bound (vf);
|
|
+ /* If we know the number of scalar iterations for the main loop we should
|
|
+ check whether after the main loop there are enough iterations left over
|
|
+ for the epilogue. */
|
|
+ if (vect_epilogues
|
|
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
+ && prolog_peeling >= 0
|
|
+ && known_eq (vf, lowest_vf))
|
|
+ {
|
|
+ unsigned HOST_WIDE_INT eiters
|
|
+ = (LOOP_VINFO_INT_NITERS (loop_vinfo)
|
|
+ - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
|
|
+
|
|
+ eiters -= prolog_peeling;
|
|
+ eiters
|
|
+ = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
|
|
+
|
|
+ unsigned int ratio;
|
|
+ unsigned int epilogue_gaps
|
|
+ = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
|
|
+ while (!(constant_multiple_p
|
|
+ (GET_MODE_SIZE (loop_vinfo->vector_mode),
|
|
+ GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio)
|
|
+ && eiters >= lowest_vf / ratio + epilogue_gaps))
|
|
+ {
|
|
+ delete epilogue_vinfo;
|
|
+ epilogue_vinfo = NULL;
|
|
+ if (loop_vinfo->epilogue_vinfos.length () == 0)
|
|
+ {
|
|
+ vect_epilogues = false;
|
|
+ break;
|
|
+ }
|
|
+ epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
|
|
+ loop_vinfo->epilogue_vinfos.ordered_remove (0);
|
|
+ epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
|
|
+ }
|
|
+ }
|
|
/* Prolog loop may be skipped. */
|
|
bool skip_prolog = (prolog_peeling != 0);
|
|
- /* Skip to epilog if scalar loop may be preferred. It's only needed
|
|
- when we peel for epilog loop and when it hasn't been checked with
|
|
- loop versioning. */
|
|
+ /* Skip this loop to epilog when there are not enough iterations to enter this
|
|
+ vectorized loop. If true we should perform runtime checks on the NITERS
|
|
+ to check whether we should skip the current vectorized loop. If we know
|
|
+ the number of scalar iterations we may choose to add a runtime check if
|
|
+ this number "maybe" smaller than the number of iterations required
|
|
+ when we know the number of scalar iterations may potentially
|
|
+ be smaller than the number of iterations required to enter this loop, for
|
|
+ this we use the upper bounds on the prolog and epilog peeling. When we
|
|
+ don't know the number of iterations and don't require versioning it is
|
|
+ because we have asserted that there are enough scalar iterations to enter
|
|
+ the main loop, so this skip is not necessary. When we are versioning then
|
|
+ we only add such a skip if we have chosen to vectorize the epilogue. */
|
|
bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
|
|
bound_prolog + bound_epilog)
|
|
- : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
|
|
+ : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
|
|
+ || vect_epilogues));
|
|
/* Epilog loop must be executed if the number of iterations for epilog
|
|
loop is known at compile time, otherwise we need to add a check at
|
|
the end of vector loop and skip to the end of epilog loop. */
|
|
@@ -2525,6 +2624,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
|
|
dump_user_location_t loop_loc = find_loop_location (loop);
|
|
struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
|
|
+ if (vect_epilogues)
|
|
+ /* Make sure to set the epilogue's epilogue scalar loop, such that we can
|
|
+ use the original scalar loop as remaining epilogue if necessary. */
|
|
+ LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
|
|
+ = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
|
|
+
|
|
if (prolog_peeling)
|
|
{
|
|
e = loop_preheader_edge (loop);
|
|
@@ -2571,6 +2676,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
|
|
scale_loop_profile (prolog, prob_prolog, bound_prolog);
|
|
}
|
|
+
|
|
+ /* Save original inits for each data_reference before advancing them with
|
|
+ NITERS_PROLOG. */
|
|
+ unsigned int i;
|
|
+ struct data_reference *dr;
|
|
+ vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
|
|
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
|
|
+ orig_drs_init.safe_push (std::make_pair (dr, DR_OFFSET (dr)));
|
|
+
|
|
/* Update init address of DRs. */
|
|
vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
|
|
/* Update niters for vector loop. */
|
|
@@ -2605,8 +2719,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
"loop can't be duplicated to exit edge.\n");
|
|
gcc_unreachable ();
|
|
}
|
|
- /* Peel epilog and put it on exit edge of loop. */
|
|
- epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
|
|
+ /* Peel epilog and put it on exit edge of loop. If we are vectorizing
|
|
+ said epilog then we should use a copy of the main loop as a starting
|
|
+ point. This loop may have already had some preliminary transformations
|
|
+ to allow for more optimal vectorization, for example if-conversion.
|
|
+ If we are not vectorizing the epilog then we should use the scalar loop
|
|
+ as the transformations mentioned above make less or no sense when not
|
|
+ vectorizing. */
|
|
+ epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
|
|
+ epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
|
|
if (!epilog)
|
|
{
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
|
|
@@ -2635,6 +2756,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
guard_to, guard_bb,
|
|
prob_vector.invert (),
|
|
irred_flag);
|
|
+ skip_e = guard_e;
|
|
e = EDGE_PRED (guard_to, 0);
|
|
e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
|
|
slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
|
|
@@ -2656,7 +2778,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
}
|
|
|
|
basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
|
|
- tree niters_vector_mult_vf;
|
|
/* If loop is peeled for non-zero constant times, now niters refers to
|
|
orig_niters - prolog_peeling, it won't overflow even the orig_niters
|
|
overflows. */
|
|
@@ -2679,7 +2800,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
/* Update IVs of original loop as if they were advanced by
|
|
niters_vector_mult_vf steps. */
|
|
gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
|
|
- edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
|
|
+ update_e = skip_vector ? e : loop_preheader_edge (epilog);
|
|
vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
|
|
update_e);
|
|
|
|
@@ -2720,10 +2841,75 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|
adjust_vec_debug_stmts ();
|
|
scev_reset ();
|
|
}
|
|
+
|
|
+ if (vect_epilogues)
|
|
+ {
|
|
+ epilog->aux = epilogue_vinfo;
|
|
+ LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
|
|
+
|
|
+ loop_constraint_clear (epilog, LOOP_C_INFINITE);
|
|
+
|
|
+ /* We now must calculate the number of NITERS performed by the previous
|
|
+ loop and EPILOGUE_NITERS to be performed by the epilogue. */
|
|
+ tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
|
|
+ niters_prolog, niters_vector_mult_vf);
|
|
+
|
|
+ /* If skip_vector we may skip the previous loop, we insert a phi-node to
|
|
+ determine whether we are coming from the previous vectorized loop
|
|
+ using the update_e edge or the skip_vector basic block using the
|
|
+ skip_e edge. */
|
|
+ if (skip_vector)
|
|
+ {
|
|
+ gcc_assert (update_e != NULL && skip_e != NULL);
|
|
+ gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
|
|
+ update_e->dest);
|
|
+ tree new_ssa = make_ssa_name (TREE_TYPE (niters));
|
|
+ gimple *stmt = gimple_build_assign (new_ssa, niters);
|
|
+ gimple_stmt_iterator gsi;
|
|
+ if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
|
|
+ && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
|
|
+ {
|
|
+ gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
|
|
+ gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ gsi = gsi_last_bb (update_e->src);
|
|
+ gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
|
|
+ }
|
|
+
|
|
+ niters = new_ssa;
|
|
+ add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
|
|
+ add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
|
|
+ UNKNOWN_LOCATION);
|
|
+ niters = PHI_RESULT (new_phi);
|
|
+ }
|
|
+
|
|
+ /* Subtract the number of iterations performed by the vectorized loop
|
|
+ from the number of total iterations. */
|
|
+ tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
|
|
+ before_loop_niters,
|
|
+ niters);
|
|
+
|
|
+ LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
|
|
+ LOOP_VINFO_NITERSM1 (epilogue_vinfo)
|
|
+ = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
|
|
+ epilogue_niters,
|
|
+ build_one_cst (TREE_TYPE (epilogue_niters)));
|
|
+
|
|
+ /* Set ADVANCE to the number of iterations performed by the previous
|
|
+ loop and its prologue. */
|
|
+ *advance = niters;
|
|
+
|
|
+ /* Redo the peeling for niter analysis as the NITERs and alignment
|
|
+ may have been updated to take the main loop into account. */
|
|
+ determine_peel_for_niter (epilogue_vinfo);
|
|
+ }
|
|
+
|
|
adjust_vec.release ();
|
|
free_original_copy_tables ();
|
|
|
|
- return epilog;
|
|
+ return vect_epilogues ? epilog : NULL;
|
|
}
|
|
|
|
/* Function vect_create_cond_for_niters_checks.
|
|
@@ -2987,9 +3173,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
|
|
*COND_EXPR_STMT_LIST. */
|
|
|
|
struct loop *
|
|
-vect_loop_versioning (loop_vec_info loop_vinfo,
|
|
- unsigned int th, bool check_profitability,
|
|
- poly_uint64 versioning_threshold)
|
|
+vect_loop_versioning (loop_vec_info loop_vinfo)
|
|
{
|
|
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
|
|
struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
|
|
@@ -3009,10 +3193,15 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
|
|
bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
|
|
bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
|
|
bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
|
|
+ poly_uint64 versioning_threshold
|
|
+ = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
|
|
tree version_simd_if_cond
|
|
= LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
|
|
+ unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
|
|
|
|
- if (check_profitability)
|
|
+ if (th >= vect_vf_for_cost (loop_vinfo)
|
|
+ && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
+ && !ordered_p (th, versioning_threshold))
|
|
cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
|
|
build_int_cst (TREE_TYPE (scalar_loop_iters),
|
|
th - 1));
|
|
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
|
index 0308b26b808..6cbdfd1ad1a 100644
|
|
--- a/gcc/tree-vect-loop.c
|
|
+++ b/gcc/tree-vect-loop.c
|
|
@@ -154,6 +154,8 @@ along with GCC; see the file COPYING3. If not see
|
|
*/
|
|
|
|
static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
|
|
+static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
|
|
+ bool *);
|
|
|
|
/* Subroutine of vect_determine_vf_for_stmt that handles only one
|
|
statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
|
|
@@ -325,7 +327,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|
"get vectype for scalar type: %T\n",
|
|
scalar_type);
|
|
|
|
- vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
|
|
if (!vectype)
|
|
return opt_result::failure_at (phi,
|
|
"not vectorized: unsupported "
|
|
@@ -559,19 +561,19 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
|
&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
|
|
|
|
stmt_vec_info reduc_stmt_info
|
|
- = vect_force_simple_reduction (loop_vinfo, stmt_vinfo,
|
|
- &double_reduc, false);
|
|
+ = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc);
|
|
if (reduc_stmt_info)
|
|
{
|
|
- if (double_reduc)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
|
|
+ STMT_VINFO_REDUC_DEF (reduc_stmt_info) = stmt_vinfo;
|
|
+ if (double_reduc)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Detected double reduction.\n");
|
|
|
|
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
|
|
- STMT_VINFO_DEF_TYPE (reduc_stmt_info)
|
|
- = vect_double_reduction_def;
|
|
+ STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_double_reduction_def;
|
|
}
|
|
else
|
|
{
|
|
@@ -582,7 +584,6 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
|
"Detected vectorizable nested cycle.\n");
|
|
|
|
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
|
|
- STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_nested_cycle;
|
|
}
|
|
else
|
|
{
|
|
@@ -688,13 +689,16 @@ vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
|
|
stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
|
|
while (next)
|
|
{
|
|
- if (! STMT_VINFO_IN_PATTERN_P (next))
|
|
+ if (! STMT_VINFO_IN_PATTERN_P (next)
|
|
+ || STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (next)) == -1)
|
|
break;
|
|
next = REDUC_GROUP_NEXT_ELEMENT (next);
|
|
}
|
|
- /* If not all stmt in the chain are patterns try to handle
|
|
- the chain without patterns. */
|
|
- if (! next)
|
|
+ /* If not all stmt in the chain are patterns or if we failed
|
|
+ to update STMT_VINFO_REDUC_IDX try to handle the chain
|
|
+ without patterns. */
|
|
+ if (! next
|
|
+ && STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (first)) != -1)
|
|
{
|
|
vect_fixup_reduc_chain (first);
|
|
LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
|
|
@@ -730,9 +734,7 @@ vect_get_loop_niters (struct loop *loop, tree *assumptions,
|
|
if (!exit)
|
|
return cond;
|
|
|
|
- niter = chrec_dont_know;
|
|
may_be_zero = NULL_TREE;
|
|
- niter_assumptions = boolean_true_node;
|
|
if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
|
|
|| chrec_contains_undetermined (niter_desc.niter))
|
|
return cond;
|
|
@@ -826,6 +828,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
|
|
ivexpr_map (NULL),
|
|
slp_unrolling_factor (1),
|
|
single_scalar_iteration_cost (0),
|
|
+ vec_outside_cost (0),
|
|
+ vec_inside_cost (0),
|
|
vectorizable (false),
|
|
can_fully_mask_p (true),
|
|
fully_masked_p (false),
|
|
@@ -885,6 +889,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
|
|
}
|
|
}
|
|
}
|
|
+
|
|
+ epilogue_vinfos.create (6);
|
|
}
|
|
|
|
/* Free all levels of MASKS. */
|
|
@@ -959,6 +965,7 @@ _loop_vec_info::~_loop_vec_info ()
|
|
|
|
release_vec_loop_masks (&masks);
|
|
delete ivexpr_map;
|
|
+ epilogue_vinfos.release ();
|
|
|
|
loop->aux = NULL;
|
|
}
|
|
@@ -1431,8 +1438,8 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Loop contains SLP and non-SLP stmts\n");
|
|
/* Both the vectorization factor and unroll factor have the form
|
|
- current_vector_size * X for some rational X, so they must have
|
|
- a common multiple. */
|
|
+ GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
|
|
+ so they must have a common multiple. */
|
|
vectorization_factor
|
|
= force_common_multiple (vectorization_factor,
|
|
LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
|
|
@@ -1535,12 +1542,18 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
|
phi_op = PHI_ARG_DEF (phi, 0);
|
|
stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
|
|
if (!op_def_info)
|
|
- return opt_result::failure_at (phi, "unsupported phi");
|
|
+ return opt_result::failure_at (phi, "unsupported phi\n");
|
|
|
|
if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
|
|
&& (STMT_VINFO_RELEVANT (op_def_info)
|
|
!= vect_used_in_outer_by_reduction))
|
|
- return opt_result::failure_at (phi, "unsupported phi");
|
|
+ return opt_result::failure_at (phi, "unsupported phi\n");
|
|
+
|
|
+ if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
|
|
+ || (STMT_VINFO_DEF_TYPE (stmt_info)
|
|
+ == vect_double_reduction_def))
|
|
+ && !vectorizable_lc_phi (stmt_info, NULL, NULL))
|
|
+ return opt_result::failure_at (phi, "unsupported phi\n");
|
|
}
|
|
|
|
continue;
|
|
@@ -1564,18 +1577,19 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
|
ok = vectorizable_induction (stmt_info, NULL, NULL, NULL,
|
|
&cost_vec);
|
|
else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|
|
+ || (STMT_VINFO_DEF_TYPE (stmt_info)
|
|
+ == vect_double_reduction_def)
|
|
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
|
|
&& ! PURE_SLP_STMT (stmt_info))
|
|
- ok = vectorizable_reduction (stmt_info, NULL, NULL, NULL, NULL,
|
|
- &cost_vec);
|
|
+ ok = vectorizable_reduction (stmt_info, NULL, NULL, &cost_vec);
|
|
}
|
|
|
|
/* SLP PHIs are tested by vect_slp_analyze_node_operations. */
|
|
if (ok
|
|
&& STMT_VINFO_LIVE_P (stmt_info)
|
|
&& !PURE_SLP_STMT (stmt_info))
|
|
- ok = vectorizable_live_operation (stmt_info, NULL, NULL, -1, NULL,
|
|
- &cost_vec);
|
|
+ ok = vectorizable_live_operation (stmt_info, NULL, NULL, NULL,
|
|
+ -1, NULL, &cost_vec);
|
|
|
|
if (!ok)
|
|
return opt_result::failure_at (phi,
|
|
@@ -1692,9 +1706,20 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
|
|
return 0;
|
|
}
|
|
|
|
- HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
|
|
- if (estimated_niter == -1)
|
|
- estimated_niter = likely_max_stmt_executions_int (loop);
|
|
+ HOST_WIDE_INT estimated_niter;
|
|
+
|
|
+ /* If we are vectorizing an epilogue then we know the maximum number of
|
|
+ scalar iterations it will cover is at least one lower than the
|
|
+ vectorization factor of the main loop. */
|
|
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
|
+ estimated_niter
|
|
+ = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) - 1;
|
|
+ else
|
|
+ {
|
|
+ estimated_niter = estimated_stmt_executions_int (loop);
|
|
+ if (estimated_niter == -1)
|
|
+ estimated_niter = likely_max_stmt_executions_int (loop);
|
|
+ }
|
|
if (estimated_niter != -1
|
|
&& ((unsigned HOST_WIDE_INT) estimated_niter
|
|
< MAX (th, (unsigned) min_profitable_estimate)))
|
|
@@ -1774,6 +1799,101 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
|
|
return opt_result::success ();
|
|
}
|
|
|
|
+/* Look for SLP-only access groups and turn each individual access into its own
|
|
+ group. */
|
|
+static void
|
|
+vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
|
|
+{
|
|
+ unsigned int i;
|
|
+ struct data_reference *dr;
|
|
+
|
|
+ DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
|
|
+
|
|
+ vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
|
|
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
|
|
+ {
|
|
+ gcc_assert (DR_REF (dr));
|
|
+ stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr));
|
|
+
|
|
+ /* Check if the load is a part of an interleaving chain. */
|
|
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
|
+ {
|
|
+ stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
|
|
+ unsigned int group_size = DR_GROUP_SIZE (first_element);
|
|
+
|
|
+ /* Check if SLP-only groups. */
|
|
+ if (!STMT_SLP_TYPE (stmt_info)
|
|
+ && STMT_VINFO_SLP_VECT_ONLY (first_element))
|
|
+ {
|
|
+ /* Dissolve the group. */
|
|
+ STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
|
|
+
|
|
+ stmt_vec_info vinfo = first_element;
|
|
+ while (vinfo)
|
|
+ {
|
|
+ stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
|
|
+ DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
|
|
+ DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
|
|
+ DR_GROUP_SIZE (vinfo) = 1;
|
|
+ DR_GROUP_GAP (vinfo) = group_size - 1;
|
|
+ vinfo = next;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Decides whether we need to create an epilogue loop to handle
|
|
+ remaining scalar iterations and sets PEELING_FOR_NITERS accordingly. */
|
|
+
|
|
+void
|
|
+determine_peel_for_niter (loop_vec_info loop_vinfo)
|
|
+{
|
|
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
|
|
+
|
|
+ unsigned HOST_WIDE_INT const_vf;
|
|
+ HOST_WIDE_INT max_niter
|
|
+ = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
|
|
+
|
|
+ unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
|
|
+ if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
|
|
+ th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
|
|
+ (loop_vinfo));
|
|
+
|
|
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
|
|
+ /* The main loop handles all iterations. */
|
|
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
|
|
+ else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
|
|
+ {
|
|
+ /* Work out the (constant) number of iterations that need to be
|
|
+ peeled for reasons other than niters. */
|
|
+ unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
|
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
|
+ peel_niter += 1;
|
|
+ if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
|
|
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
|
|
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
|
|
+ }
|
|
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
|
|
+ /* ??? When peeling for gaps but not alignment, we could
|
|
+ try to check whether the (variable) niters is known to be
|
|
+ VF * N + 1. That's something of a niche case though. */
|
|
+ || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
|
+ || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
|
|
+ || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
|
+ < (unsigned) exact_log2 (const_vf))
|
|
+ /* In case of versioning, check if the maximum number of
|
|
+ iterations is greater than th. If they are identical,
|
|
+ the epilogue is unnecessary. */
|
|
+ && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
|
|
+ || ((unsigned HOST_WIDE_INT) max_niter
|
|
+ > (th / const_vf) * const_vf))))
|
|
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
|
|
+}
|
|
+
|
|
+
|
|
/* Function vect_analyze_loop_2.
|
|
|
|
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
|
@@ -1786,6 +1906,15 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
|
|
int res;
|
|
unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
|
|
poly_uint64 min_vf = 2;
|
|
+ loop_vec_info orig_loop_vinfo = NULL;
|
|
+
|
|
+ /* If we are dealing with an epilogue then orig_loop_vinfo points to the
|
|
+ loop_vec_info of the first vectorized loop. */
|
|
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
|
+ orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
|
|
+ else
|
|
+ orig_loop_vinfo = loop_vinfo;
|
|
+ gcc_assert (orig_loop_vinfo);
|
|
|
|
/* The first group of checks is independent of the vector size. */
|
|
fatal = true;
|
|
@@ -1824,7 +1953,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
|
|
/* Analyze the data references and also adjust the minimal
|
|
vectorization factor according to the loads and stores. */
|
|
|
|
- ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
|
|
+ ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
|
|
if (!ok)
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -1855,7 +1984,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
|
|
|
|
/* Data-flow analysis to detect stmts that do not need to be vectorized. */
|
|
|
|
- ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
|
|
+ ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
|
|
if (!ok)
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -1901,7 +2030,6 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
|
|
vect_compute_single_scalar_iteration_cost (loop_vinfo);
|
|
|
|
poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
|
- unsigned th;
|
|
|
|
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
|
|
ok = vect_analyze_slp (loop_vinfo, *n_stmts);
|
|
@@ -1941,9 +2069,6 @@ start_over:
|
|
LOOP_VINFO_INT_NITERS (loop_vinfo));
|
|
}
|
|
|
|
- HOST_WIDE_INT max_niter
|
|
- = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
|
|
-
|
|
/* Analyze the alignment of the data-refs in the loop.
|
|
Fail if a data reference is found that cannot be vectorized. */
|
|
|
|
@@ -1990,6 +2115,9 @@ start_over:
|
|
}
|
|
}
|
|
|
|
+ /* Dissolve SLP-only groups. */
|
|
+ vect_dissolve_slp_only_groups (loop_vinfo);
|
|
+
|
|
/* Scan all the remaining operations in the loop that are not subject
|
|
to SLP and make sure they are vectorizable. */
|
|
ok = vect_analyze_loop_operations (loop_vinfo);
|
|
@@ -2032,6 +2160,16 @@ start_over:
|
|
" support peeling for gaps.\n");
|
|
}
|
|
|
|
+ /* If we're vectorizing an epilogue loop, we either need a fully-masked
|
|
+ loop or a loop that has a lower VF than the main loop. */
|
|
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
|
|
+ && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|
|
+ && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
|
|
+ LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
|
|
+ return opt_result::failure_at (vect_location,
|
|
+ "Vectorization factor too high for"
|
|
+ " epilogue loop.\n");
|
|
+
|
|
/* Check the costings of the loop make vectorizing worthwhile. */
|
|
res = vect_analyze_loop_costing (loop_vinfo);
|
|
if (res < 0)
|
|
@@ -2044,42 +2182,7 @@ start_over:
|
|
return opt_result::failure_at (vect_location,
|
|
"Loop costings not worthwhile.\n");
|
|
|
|
- /* Decide whether we need to create an epilogue loop to handle
|
|
- remaining scalar iterations. */
|
|
- th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
|
|
-
|
|
- unsigned HOST_WIDE_INT const_vf;
|
|
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
|
|
- /* The main loop handles all iterations. */
|
|
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
|
|
- else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
|
|
- {
|
|
- /* Work out the (constant) number of iterations that need to be
|
|
- peeled for reasons other than niters. */
|
|
- unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
|
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
|
- peel_niter += 1;
|
|
- if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
|
|
- LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
|
|
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
|
|
- }
|
|
- else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
|
|
- /* ??? When peeling for gaps but not alignment, we could
|
|
- try to check whether the (variable) niters is known to be
|
|
- VF * N + 1. That's something of a niche case though. */
|
|
- || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
|
- || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
|
|
- || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
|
- < (unsigned) exact_log2 (const_vf))
|
|
- /* In case of versioning, check if the maximum number of
|
|
- iterations is greater than th. If they are identical,
|
|
- the epilogue is unnecessary. */
|
|
- && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
|
|
- || ((unsigned HOST_WIDE_INT) max_niter
|
|
- > (th / const_vf) * const_vf))))
|
|
- LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
|
|
-
|
|
+ determine_peel_for_niter (loop_vinfo);
|
|
/* If an epilogue loop is required make sure we can create one. */
|
|
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
|
|| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
|
|
@@ -2101,10 +2204,21 @@ start_over:
|
|
/* During peeling, we need to check if number of loop iterations is
|
|
enough for both peeled prolog loop and vector loop. This check
|
|
can be merged along with threshold check of loop versioning, so
|
|
- increase threshold for this case if necessary. */
|
|
- if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
|
|
+ increase threshold for this case if necessary.
|
|
+
|
|
+ If we are analyzing an epilogue we still want to check what its
|
|
+ versioning threshold would be. If we decide to vectorize the epilogues we
|
|
+ will want to use the lowest versioning threshold of all epilogues and main
|
|
+ loop. This will enable us to enter a vectorized epilogue even when
|
|
+ versioning the loop. We can't simply check whether the epilogue requires
|
|
+ versioning though since we may have skipped some versioning checks when
|
|
+ analyzing the epilogue. For instance, checks for alias versioning will be
|
|
+ skipped when dealing with epilogues as we assume we already checked them
|
|
+ for the main loop. So instead we always check the 'orig_loop_vinfo'. */
|
|
+ if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo))
|
|
{
|
|
poly_uint64 niters_th = 0;
|
|
+ unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
|
|
|
|
if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
|
|
{
|
|
@@ -2125,6 +2239,14 @@ start_over:
|
|
/* One additional iteration because of peeling for gap. */
|
|
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
|
niters_th += 1;
|
|
+
|
|
+ /* Use the same condition as vect_transform_loop to decide when to use
|
|
+ the cost to determine a versioning threshold. */
|
|
+ if (th >= vect_vf_for_cost (loop_vinfo)
|
|
+ && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
+ && ordered_p (th, niters_th))
|
|
+ niters_th = ordered_max (poly_uint64 (th), niters_th);
|
|
+
|
|
LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
|
|
}
|
|
|
|
@@ -2240,22 +2362,95 @@ again:
|
|
goto start_over;
|
|
}
|
|
|
|
+/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
|
|
+ to be better than vectorizing it using OLD_LOOP_VINFO. Assume that
|
|
+ OLD_LOOP_VINFO is better unless something specifically indicates
|
|
+ otherwise.
|
|
+
|
|
+ Note that this deliberately isn't a partial order. */
|
|
+
|
|
+static bool
|
|
+vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
|
|
+ loop_vec_info old_loop_vinfo)
|
|
+{
|
|
+ struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo);
|
|
+ gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop);
|
|
+
|
|
+ poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo);
|
|
+ poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo);
|
|
+
|
|
+ /* Always prefer a VF of loop->simdlen over any other VF. */
|
|
+ if (loop->simdlen)
|
|
+ {
|
|
+ bool new_simdlen_p = known_eq (new_vf, loop->simdlen);
|
|
+ bool old_simdlen_p = known_eq (old_vf, loop->simdlen);
|
|
+ if (new_simdlen_p != old_simdlen_p)
|
|
+ return new_simdlen_p;
|
|
+ }
|
|
+
|
|
+ /* Limit the VFs to what is likely to be the maximum number of iterations,
|
|
+ to handle cases in which at least one loop_vinfo is fully-masked. */
|
|
+ HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
|
|
+ if (estimated_max_niter != -1)
|
|
+ {
|
|
+ if (known_le (estimated_max_niter, new_vf))
|
|
+ new_vf = estimated_max_niter;
|
|
+ if (known_le (estimated_max_niter, old_vf))
|
|
+ old_vf = estimated_max_niter;
|
|
+ }
|
|
+
|
|
+ /* Check whether the (fractional) cost per scalar iteration is lower
|
|
+ or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf. */
|
|
+ poly_widest_int rel_new = (new_loop_vinfo->vec_inside_cost
|
|
+ * poly_widest_int (old_vf));
|
|
+ poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
|
|
+ * poly_widest_int (new_vf));
|
|
+ if (maybe_lt (rel_old, rel_new))
|
|
+ return false;
|
|
+ if (known_lt (rel_new, rel_old))
|
|
+ return true;
|
|
+
|
|
+ /* If there's nothing to choose between the loop bodies, see whether
|
|
+ there's a difference in the prologue and epilogue costs. */
|
|
+ if (new_loop_vinfo->vec_outside_cost != old_loop_vinfo->vec_outside_cost)
|
|
+ return new_loop_vinfo->vec_outside_cost < old_loop_vinfo->vec_outside_cost;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return
|
|
+ true if we should. */
|
|
+
|
|
+static bool
|
|
+vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
|
|
+ loop_vec_info old_loop_vinfo)
|
|
+{
|
|
+ if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
|
|
+ return false;
|
|
+
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Preferring vector mode %s to vector mode %s\n",
|
|
+ GET_MODE_NAME (new_loop_vinfo->vector_mode),
|
|
+ GET_MODE_NAME (old_loop_vinfo->vector_mode));
|
|
+ return true;
|
|
+}
|
|
+
|
|
/* Function vect_analyze_loop.
|
|
|
|
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
|
for it. The different analyses will record information in the
|
|
- loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must
|
|
- be vectorized. */
|
|
+ loop_vec_info struct. */
|
|
opt_loop_vec_info
|
|
-vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|
- vec_info_shared *shared)
|
|
+vect_analyze_loop (struct loop *loop, vec_info_shared *shared)
|
|
{
|
|
- auto_vector_sizes vector_sizes;
|
|
+ auto_vector_modes vector_modes;
|
|
|
|
/* Autodetect first vector size we try. */
|
|
- current_vector_size = 0;
|
|
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
|
- unsigned int next_size = 0;
|
|
+ unsigned int autovec_flags
|
|
+ = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
|
|
+ loop->simdlen != 0);
|
|
+ unsigned int mode_i = 0;
|
|
|
|
DUMP_VECT_SCOPE ("analyze_loop_nest");
|
|
|
|
@@ -2272,58 +2467,221 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
|
|
" loops cannot be vectorized\n");
|
|
|
|
unsigned n_stmts = 0;
|
|
- poly_uint64 autodetected_vector_size = 0;
|
|
+ machine_mode autodetected_vector_mode = VOIDmode;
|
|
+ opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
|
|
+ machine_mode next_vector_mode = VOIDmode;
|
|
+ poly_uint64 lowest_th = 0;
|
|
+ unsigned vectorized_loops = 0;
|
|
+ bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
|
|
+ && !unlimited_cost_model (loop));
|
|
+
|
|
+ bool vect_epilogues = false;
|
|
+ opt_result res = opt_result::success ();
|
|
+ unsigned HOST_WIDE_INT simdlen = loop->simdlen;
|
|
while (1)
|
|
{
|
|
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
|
|
- opt_loop_vec_info loop_vinfo
|
|
- = vect_analyze_loop_form (loop, shared);
|
|
+ opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
|
|
if (!loop_vinfo)
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"bad loop form.\n");
|
|
+ gcc_checking_assert (first_loop_vinfo == NULL);
|
|
return loop_vinfo;
|
|
}
|
|
+ loop_vinfo->vector_mode = next_vector_mode;
|
|
|
|
bool fatal = false;
|
|
|
|
- if (orig_loop_vinfo)
|
|
- LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
|
|
+ /* When pick_lowest_cost_p is true, we should in principle iterate
|
|
+ over all the loop_vec_infos that LOOP_VINFO could replace and
|
|
+ try to vectorize LOOP_VINFO under the same conditions.
|
|
+ E.g. when trying to replace an epilogue loop, we should vectorize
|
|
+ LOOP_VINFO as an epilogue loop with the same VF limit. When trying
|
|
+ to replace the main loop, we should vectorize LOOP_VINFO as a main
|
|
+ loop too.
|
|
+
|
|
+ However, autovectorize_vector_modes is usually sorted as follows:
|
|
+
|
|
+ - Modes that naturally produce lower VFs usually follow modes that
|
|
+ naturally produce higher VFs.
|
|
+
|
|
+ - When modes naturally produce the same VF, maskable modes
|
|
+ usually follow unmaskable ones, so that the maskable mode
|
|
+ can be used to vectorize the epilogue of the unmaskable mode.
|
|
+
|
|
+ This order is preferred because it leads to the maximum
|
|
+ epilogue vectorization opportunities. Targets should only use
|
|
+ a different order if they want to make wide modes available while
|
|
+ disparaging them relative to earlier, smaller modes. The assumption
|
|
+ in that case is that the wider modes are more expensive in some
|
|
+ way that isn't reflected directly in the costs.
|
|
+
|
|
+ There should therefore be few interesting cases in which
|
|
+ LOOP_VINFO fails when treated as an epilogue loop, succeeds when
|
|
+ treated as a standalone loop, and ends up being genuinely cheaper
|
|
+ than FIRST_LOOP_VINFO. */
|
|
+ if (vect_epilogues)
|
|
+ LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo;
|
|
+
|
|
+ res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
|
|
+ if (mode_i == 0)
|
|
+ autodetected_vector_mode = loop_vinfo->vector_mode;
|
|
+ if (dump_enabled_p ())
|
|
+ {
|
|
+ if (res)
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Analysis succeeded with vector mode %s\n",
|
|
+ GET_MODE_NAME (loop_vinfo->vector_mode));
|
|
+ else
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Analysis failed with vector mode %s\n",
|
|
+ GET_MODE_NAME (loop_vinfo->vector_mode));
|
|
+ }
|
|
+
|
|
+ loop->aux = NULL;
|
|
+
|
|
+ if (!fatal)
|
|
+ while (mode_i < vector_modes.length ()
|
|
+ && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i]))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** The result for vector mode %s would"
|
|
+ " be the same\n",
|
|
+ GET_MODE_NAME (vector_modes[mode_i]));
|
|
+ mode_i += 1;
|
|
+ }
|
|
|
|
- opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
|
|
if (res)
|
|
{
|
|
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
|
|
+ vectorized_loops++;
|
|
|
|
- return loop_vinfo;
|
|
- }
|
|
-
|
|
- delete loop_vinfo;
|
|
+ /* Once we hit the desired simdlen for the first time,
|
|
+ discard any previous attempts. */
|
|
+ if (simdlen
|
|
+ && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen))
|
|
+ {
|
|
+ delete first_loop_vinfo;
|
|
+ first_loop_vinfo = opt_loop_vec_info::success (NULL);
|
|
+ LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
|
|
+ simdlen = 0;
|
|
+ }
|
|
+ else if (pick_lowest_cost_p && first_loop_vinfo)
|
|
+ {
|
|
+ /* Keep trying to roll back vectorization attempts while the
|
|
+ loop_vec_infos they produced were worse than this one. */
|
|
+ vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos;
|
|
+ while (!vinfos.is_empty ()
|
|
+ && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
|
|
+ {
|
|
+ gcc_assert (vect_epilogues);
|
|
+ delete vinfos.pop ();
|
|
+ }
|
|
+ if (vinfos.is_empty ()
|
|
+ && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
|
|
+ {
|
|
+ delete first_loop_vinfo;
|
|
+ first_loop_vinfo = opt_loop_vec_info::success (NULL);
|
|
+ LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
|
|
+ }
|
|
+ }
|
|
|
|
- if (next_size == 0)
|
|
- autodetected_vector_size = current_vector_size;
|
|
+ if (first_loop_vinfo == NULL)
|
|
+ {
|
|
+ first_loop_vinfo = loop_vinfo;
|
|
+ lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
|
|
+ }
|
|
+ else if (vect_epilogues
|
|
+ /* For now only allow one epilogue loop. */
|
|
+ && first_loop_vinfo->epilogue_vinfos.is_empty ())
|
|
+ {
|
|
+ first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
|
|
+ poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
|
|
+ gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
|
|
+ || maybe_ne (lowest_th, 0U));
|
|
+ /* Keep track of the known smallest versioning
|
|
+ threshold. */
|
|
+ if (ordered_p (lowest_th, th))
|
|
+ lowest_th = ordered_min (lowest_th, th);
|
|
+ }
|
|
+ else
|
|
+ delete loop_vinfo;
|
|
+
|
|
+ /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
|
|
+ enabled, SIMDUID is not set, it is the innermost loop and we have
|
|
+ either already found the loop's SIMDLEN or there was no SIMDLEN to
|
|
+ begin with.
|
|
+ TODO: Enable epilogue vectorization for loops with SIMDUID set. */
|
|
+ vect_epilogues = (!simdlen
|
|
+ && loop->inner == NULL
|
|
+ && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)
|
|
+ && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
|
|
+ && !loop->simduid
|
|
+ /* For now only allow one epilogue loop, but allow
|
|
+ pick_lowest_cost_p to replace it. */
|
|
+ && (first_loop_vinfo->epilogue_vinfos.is_empty ()
|
|
+ || pick_lowest_cost_p));
|
|
+
|
|
+ /* Commit to first_loop_vinfo if we have no reason to try
|
|
+ alternatives. */
|
|
+ if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
|
|
+ break;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ delete loop_vinfo;
|
|
+ if (fatal)
|
|
+ {
|
|
+ gcc_checking_assert (first_loop_vinfo == NULL);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
|
|
- if (next_size < vector_sizes.length ()
|
|
- && known_eq (vector_sizes[next_size], autodetected_vector_size))
|
|
- next_size += 1;
|
|
+ if (mode_i < vector_modes.length ()
|
|
+ && VECTOR_MODE_P (autodetected_vector_mode)
|
|
+ && (related_vector_mode (vector_modes[mode_i],
|
|
+ GET_MODE_INNER (autodetected_vector_mode))
|
|
+ == autodetected_vector_mode)
|
|
+ && (related_vector_mode (autodetected_vector_mode,
|
|
+ GET_MODE_INNER (vector_modes[mode_i]))
|
|
+ == vector_modes[mode_i]))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Skipping vector mode %s, which would"
|
|
+ " repeat the analysis for %s\n",
|
|
+ GET_MODE_NAME (vector_modes[mode_i]),
|
|
+ GET_MODE_NAME (autodetected_vector_mode));
|
|
+ mode_i += 1;
|
|
+ }
|
|
|
|
- if (fatal
|
|
- || next_size == vector_sizes.length ()
|
|
- || known_eq (current_vector_size, 0U))
|
|
- return opt_loop_vec_info::propagate_failure (res);
|
|
+ if (mode_i == vector_modes.length ()
|
|
+ || autodetected_vector_mode == VOIDmode)
|
|
+ break;
|
|
|
|
/* Try the next biggest vector size. */
|
|
- current_vector_size = vector_sizes[next_size++];
|
|
+ next_vector_mode = vector_modes[mode_i++];
|
|
if (dump_enabled_p ())
|
|
- {
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "***** Re-trying analysis with "
|
|
- "vector size ");
|
|
- dump_dec (MSG_NOTE, current_vector_size);
|
|
- dump_printf (MSG_NOTE, "\n");
|
|
- }
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Re-trying analysis with vector mode %s\n",
|
|
+ GET_MODE_NAME (next_vector_mode));
|
|
+ }
|
|
+
|
|
+ if (first_loop_vinfo)
|
|
+ {
|
|
+ loop->aux = (loop_vec_info) first_loop_vinfo;
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Choosing vector mode %s\n",
|
|
+ GET_MODE_NAME (first_loop_vinfo->vector_mode));
|
|
+ LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
|
|
+ return first_loop_vinfo;
|
|
}
|
|
+
|
|
+ return opt_loop_vec_info::propagate_failure (res);
|
|
}
|
|
|
|
/* Return true if there is an in-order reduction function for CODE, storing
|
|
@@ -2397,17 +2755,17 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
|
|
|
|
/* If there is a neutral value X such that SLP reduction NODE would not
|
|
be affected by the introduction of additional X elements, return that X,
|
|
- otherwise return null. CODE is the code of the reduction. REDUC_CHAIN
|
|
- is true if the SLP statements perform a single reduction, false if each
|
|
- statement performs an independent reduction. */
|
|
+ otherwise return null. CODE is the code of the reduction and VECTOR_TYPE
|
|
+ is the vector type that would hold element X. REDUC_CHAIN is true if
|
|
+ the SLP statements perform a single reduction, false if each statement
|
|
+ performs an independent reduction. */
|
|
|
|
static tree
|
|
-neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
|
|
- bool reduc_chain)
|
|
+neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
|
|
+ tree_code code, bool reduc_chain)
|
|
{
|
|
vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
|
|
stmt_vec_info stmt_vinfo = stmts[0];
|
|
- tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
|
|
tree scalar_type = TREE_TYPE (vector_type);
|
|
struct loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
|
|
gcc_assert (loop);
|
|
@@ -2453,241 +2811,55 @@ report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
|
|
dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
|
|
}
|
|
|
|
-/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
|
|
- operation. Return true if the results of DEF_STMT_INFO are something
|
|
- that can be accumulated by such a reduction. */
|
|
+/* Return true if we need an in-order reduction for operation CODE
|
|
+ on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
|
|
+ overflow must wrap. */
|
|
|
|
-static bool
|
|
-vect_valid_reduction_input_p (stmt_vec_info def_stmt_info)
|
|
+bool
|
|
+needs_fold_left_reduction_p (tree type, tree_code code)
|
|
{
|
|
- return (is_gimple_assign (def_stmt_info->stmt)
|
|
- || is_gimple_call (def_stmt_info->stmt)
|
|
- || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
|
|
- || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
|
|
- && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
|
|
- && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
|
|
-}
|
|
+ /* CHECKME: check for !flag_finite_math_only too? */
|
|
+ if (SCALAR_FLOAT_TYPE_P (type))
|
|
+ switch (code)
|
|
+ {
|
|
+ case MIN_EXPR:
|
|
+ case MAX_EXPR:
|
|
+ return false;
|
|
|
|
-/* Detect SLP reduction of the form:
|
|
+ default:
|
|
+ return !flag_associative_math;
|
|
+ }
|
|
|
|
- #a1 = phi <a5, a0>
|
|
- a2 = operation (a1)
|
|
- a3 = operation (a2)
|
|
- a4 = operation (a3)
|
|
- a5 = operation (a4)
|
|
+ if (INTEGRAL_TYPE_P (type))
|
|
+ {
|
|
+ if (!operation_no_trapping_overflow (type, code))
|
|
+ return true;
|
|
+ return false;
|
|
+ }
|
|
|
|
- #a = phi <a5>
|
|
+ if (SAT_FIXED_POINT_TYPE_P (type))
|
|
+ return true;
|
|
|
|
- PHI is the reduction phi node (#a1 = phi <a5, a0> above)
|
|
- FIRST_STMT is the first reduction stmt in the chain
|
|
- (a2 = operation (a1)).
|
|
+ return false;
|
|
+}
|
|
|
|
- Return TRUE if a reduction chain was detected. */
|
|
+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
|
|
+ has a handled computation expression. Store the main reduction
|
|
+ operation in *CODE. */
|
|
|
|
static bool
|
|
-vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
|
|
- gimple *first_stmt)
|
|
+check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
|
|
+ tree loop_arg, enum tree_code *code,
|
|
+ vec<std::pair<ssa_op_iter, use_operand_p> > &path)
|
|
{
|
|
- struct loop *loop = (gimple_bb (phi))->loop_father;
|
|
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
|
- enum tree_code code;
|
|
- gimple *loop_use_stmt = NULL;
|
|
- stmt_vec_info use_stmt_info;
|
|
- tree lhs;
|
|
- imm_use_iterator imm_iter;
|
|
- use_operand_p use_p;
|
|
- int nloop_uses, size = 0, n_out_of_loop_uses;
|
|
- bool found = false;
|
|
-
|
|
- if (loop != vect_loop)
|
|
- return false;
|
|
-
|
|
- auto_vec<stmt_vec_info, 8> reduc_chain;
|
|
- lhs = PHI_RESULT (phi);
|
|
- code = gimple_assign_rhs_code (first_stmt);
|
|
- while (1)
|
|
- {
|
|
- nloop_uses = 0;
|
|
- n_out_of_loop_uses = 0;
|
|
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
|
|
- {
|
|
- gimple *use_stmt = USE_STMT (use_p);
|
|
- if (is_gimple_debug (use_stmt))
|
|
- continue;
|
|
-
|
|
- /* Check if we got back to the reduction phi. */
|
|
- if (use_stmt == phi)
|
|
- {
|
|
- loop_use_stmt = use_stmt;
|
|
- found = true;
|
|
- break;
|
|
- }
|
|
-
|
|
- if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
|
|
- {
|
|
- loop_use_stmt = use_stmt;
|
|
- nloop_uses++;
|
|
- }
|
|
- else
|
|
- n_out_of_loop_uses++;
|
|
-
|
|
- /* There are can be either a single use in the loop or two uses in
|
|
- phi nodes. */
|
|
- if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
|
|
- return false;
|
|
- }
|
|
-
|
|
- if (found)
|
|
- break;
|
|
-
|
|
- /* We reached a statement with no loop uses. */
|
|
- if (nloop_uses == 0)
|
|
- return false;
|
|
-
|
|
- /* This is a loop exit phi, and we haven't reached the reduction phi. */
|
|
- if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
|
|
- return false;
|
|
-
|
|
- if (!is_gimple_assign (loop_use_stmt)
|
|
- || code != gimple_assign_rhs_code (loop_use_stmt)
|
|
- || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
|
|
- return false;
|
|
-
|
|
- /* Insert USE_STMT into reduction chain. */
|
|
- use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
|
|
- reduc_chain.safe_push (use_stmt_info);
|
|
-
|
|
- lhs = gimple_assign_lhs (loop_use_stmt);
|
|
- size++;
|
|
- }
|
|
-
|
|
- if (!found || loop_use_stmt != phi || size < 2)
|
|
- return false;
|
|
-
|
|
- /* Swap the operands, if needed, to make the reduction operand be the second
|
|
- operand. */
|
|
- lhs = PHI_RESULT (phi);
|
|
- for (unsigned i = 0; i < reduc_chain.length (); ++i)
|
|
- {
|
|
- gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
|
|
- if (gimple_assign_rhs2 (next_stmt) == lhs)
|
|
- {
|
|
- tree op = gimple_assign_rhs1 (next_stmt);
|
|
- stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
|
|
-
|
|
- /* Check that the other def is either defined in the loop
|
|
- ("vect_internal_def"), or it's an induction (defined by a
|
|
- loop-header phi-node). */
|
|
- if (def_stmt_info
|
|
- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
|
|
- && vect_valid_reduction_input_p (def_stmt_info))
|
|
- {
|
|
- lhs = gimple_assign_lhs (next_stmt);
|
|
- continue;
|
|
- }
|
|
-
|
|
- return false;
|
|
- }
|
|
- else
|
|
- {
|
|
- tree op = gimple_assign_rhs2 (next_stmt);
|
|
- stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
|
|
-
|
|
- /* Check that the other def is either defined in the loop
|
|
- ("vect_internal_def"), or it's an induction (defined by a
|
|
- loop-header phi-node). */
|
|
- if (def_stmt_info
|
|
- && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
|
|
- && vect_valid_reduction_input_p (def_stmt_info))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
|
|
- next_stmt);
|
|
-
|
|
- swap_ssa_operands (next_stmt,
|
|
- gimple_assign_rhs1_ptr (next_stmt),
|
|
- gimple_assign_rhs2_ptr (next_stmt));
|
|
- update_stmt (next_stmt);
|
|
-
|
|
- if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
|
|
- LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
|
|
- }
|
|
- else
|
|
- return false;
|
|
- }
|
|
-
|
|
- lhs = gimple_assign_lhs (next_stmt);
|
|
- }
|
|
-
|
|
- /* Build up the actual chain. */
|
|
- for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
|
|
- {
|
|
- REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
|
|
- REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
|
|
- }
|
|
- REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
|
|
- REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
|
|
-
|
|
- /* Save the chain for further analysis in SLP detection. */
|
|
- LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
|
|
- REDUC_GROUP_SIZE (reduc_chain[0]) = size;
|
|
-
|
|
- return true;
|
|
-}
|
|
-
|
|
-/* Return true if we need an in-order reduction for operation CODE
|
|
- on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
|
|
- overflow must wrap. */
|
|
-
|
|
-static bool
|
|
-needs_fold_left_reduction_p (tree type, tree_code code,
|
|
- bool need_wrapping_integral_overflow)
|
|
-{
|
|
- /* CHECKME: check for !flag_finite_math_only too? */
|
|
- if (SCALAR_FLOAT_TYPE_P (type))
|
|
- switch (code)
|
|
- {
|
|
- case MIN_EXPR:
|
|
- case MAX_EXPR:
|
|
- return false;
|
|
-
|
|
- default:
|
|
- return !flag_associative_math;
|
|
- }
|
|
-
|
|
- if (INTEGRAL_TYPE_P (type))
|
|
- {
|
|
- if (!operation_no_trapping_overflow (type, code))
|
|
- return true;
|
|
- if (need_wrapping_integral_overflow
|
|
- && !TYPE_OVERFLOW_WRAPS (type)
|
|
- && operation_can_overflow (code))
|
|
- return true;
|
|
- return false;
|
|
- }
|
|
-
|
|
- if (SAT_FIXED_POINT_TYPE_P (type))
|
|
- return true;
|
|
-
|
|
- return false;
|
|
-}
|
|
-
|
|
-/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
|
|
- reduction operation CODE has a handled computation expression. */
|
|
-
|
|
-bool
|
|
-check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
|
|
- tree loop_arg, enum tree_code code)
|
|
-{
|
|
- auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
|
|
- auto_bitmap visited;
|
|
- tree lookfor = PHI_RESULT (phi);
|
|
- ssa_op_iter curri;
|
|
- use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
|
|
- while (USE_FROM_PTR (curr) != loop_arg)
|
|
- curr = op_iter_next_use (&curri);
|
|
- curri.i = curri.numops;
|
|
- do
|
|
+ auto_bitmap visited;
|
|
+ tree lookfor = PHI_RESULT (phi);
|
|
+ ssa_op_iter curri;
|
|
+ use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
|
|
+ while (USE_FROM_PTR (curr) != loop_arg)
|
|
+ curr = op_iter_next_use (&curri);
|
|
+ curri.i = curri.numops;
|
|
+ do
|
|
{
|
|
path.safe_push (std::make_pair (curri, curr));
|
|
tree use = USE_FROM_PTR (curr);
|
|
@@ -2747,36 +2919,71 @@ pop:
|
|
/* Check whether the reduction path detected is valid. */
|
|
bool fail = path.length () == 0;
|
|
bool neg = false;
|
|
+ *code = ERROR_MARK;
|
|
for (unsigned i = 1; i < path.length (); ++i)
|
|
{
|
|
gimple *use_stmt = USE_STMT (path[i].second);
|
|
tree op = USE_FROM_PTR (path[i].second);
|
|
- if (! has_single_use (op)
|
|
- || ! is_gimple_assign (use_stmt))
|
|
+ if (! is_gimple_assign (use_stmt)
|
|
+ /* The following make sure we can compute the operand index
|
|
+ easily plus it mostly disallows chaining via COND_EXPR condition
|
|
+ operands. */
|
|
+ || (gimple_assign_rhs1 (use_stmt) != op
|
|
+ && gimple_assign_rhs2 (use_stmt) != op
|
|
+ && gimple_assign_rhs3 (use_stmt) != op))
|
|
{
|
|
fail = true;
|
|
break;
|
|
}
|
|
- if (gimple_assign_rhs_code (use_stmt) != code)
|
|
+ /* Check there's only a single stmt the op is used on inside
|
|
+ of the loop. */
|
|
+ imm_use_iterator imm_iter;
|
|
+ gimple *op_use_stmt;
|
|
+ unsigned cnt = 0;
|
|
+ FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
|
|
+ if (!is_gimple_debug (op_use_stmt)
|
|
+ && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
|
|
+ cnt++;
|
|
+ if (cnt != 1)
|
|
{
|
|
- if (code == PLUS_EXPR
|
|
- && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
|
|
- {
|
|
- /* Track whether we negate the reduction value each iteration. */
|
|
- if (gimple_assign_rhs2 (use_stmt) == op)
|
|
- neg = ! neg;
|
|
- }
|
|
- else
|
|
- {
|
|
- fail = true;
|
|
- break;
|
|
- }
|
|
+ fail = true;
|
|
+ break;
|
|
+ }
|
|
+ tree_code use_code = gimple_assign_rhs_code (use_stmt);
|
|
+ if (use_code == MINUS_EXPR)
|
|
+ {
|
|
+ use_code = PLUS_EXPR;
|
|
+ /* Track whether we negate the reduction value each iteration. */
|
|
+ if (gimple_assign_rhs2 (use_stmt) == op)
|
|
+ neg = ! neg;
|
|
+ }
|
|
+ if (CONVERT_EXPR_CODE_P (use_code)
|
|
+ && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt)),
|
|
+ TREE_TYPE (gimple_assign_rhs1 (use_stmt))))
|
|
+ ;
|
|
+ else if (*code == ERROR_MARK)
|
|
+ *code = use_code;
|
|
+ else if (use_code != *code)
|
|
+ {
|
|
+ fail = true;
|
|
+ break;
|
|
}
|
|
}
|
|
- return ! fail && ! neg;
|
|
+ return ! fail && ! neg && *code != ERROR_MARK;
|
|
+}
|
|
+
|
|
+bool
|
|
+check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
|
|
+ tree loop_arg, enum tree_code code)
|
|
+{
|
|
+ auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
|
|
+ enum tree_code code_;
|
|
+ return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
|
|
+ && code_ == code);
|
|
}
|
|
|
|
|
|
+
|
|
/* Function vect_is_simple_reduction
|
|
|
|
(1) Detect a cross-iteration def-use cycle that represents a simple
|
|
@@ -2823,25 +3030,15 @@ pop:
|
|
|
|
static stmt_vec_info
|
|
vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
- bool *double_reduc,
|
|
- bool need_wrapping_integral_overflow,
|
|
- enum vect_reduction_type *v_reduc_type)
|
|
+ bool *double_reduc)
|
|
{
|
|
gphi *phi = as_a <gphi *> (phi_info->stmt);
|
|
- struct loop *loop = (gimple_bb (phi))->loop_father;
|
|
- struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
|
- bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
|
|
gimple *phi_use_stmt = NULL;
|
|
- enum tree_code orig_code, code;
|
|
- tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
|
|
- tree type;
|
|
- tree name;
|
|
imm_use_iterator imm_iter;
|
|
use_operand_p use_p;
|
|
- bool phi_def;
|
|
|
|
*double_reduc = false;
|
|
- *v_reduc_type = TREE_CODE_REDUCTION;
|
|
+ STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
|
|
|
|
tree phi_name = PHI_RESULT (phi);
|
|
/* ??? If there are no uses of the PHI result the inner loop reduction
|
|
@@ -2850,6 +3047,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
can be constant. See PR60382. */
|
|
if (has_zero_uses (phi_name))
|
|
return NULL;
|
|
+ class loop *loop = (gimple_bb (phi))->loop_father;
|
|
unsigned nphi_def_loop_uses = 0;
|
|
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
|
|
{
|
|
@@ -2870,44 +3068,26 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
phi_use_stmt = use_stmt;
|
|
}
|
|
|
|
- edge latch_e = loop_latch_edge (loop);
|
|
- tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
|
|
- if (TREE_CODE (loop_arg) != SSA_NAME)
|
|
+ tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
|
|
+ if (TREE_CODE (latch_def) != SSA_NAME)
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "reduction: not ssa_name: %T\n", loop_arg);
|
|
+ "reduction: not ssa_name: %T\n", latch_def);
|
|
return NULL;
|
|
}
|
|
|
|
- stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
|
|
+ stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
|
|
if (!def_stmt_info
|
|
|| !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
|
|
return NULL;
|
|
|
|
- if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
|
|
- {
|
|
- name = gimple_assign_lhs (def_stmt);
|
|
- phi_def = false;
|
|
- }
|
|
- else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
|
|
- {
|
|
- name = PHI_RESULT (def_stmt);
|
|
- phi_def = true;
|
|
- }
|
|
- else
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "reduction: unhandled reduction operation: %G",
|
|
- def_stmt_info->stmt);
|
|
- return NULL;
|
|
- }
|
|
-
|
|
+ bool nested_in_vect_loop
|
|
+ = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info), loop);
|
|
unsigned nlatch_def_loop_uses = 0;
|
|
auto_vec<gphi *, 3> lcphis;
|
|
bool inner_loop_of_double_reduc = false;
|
|
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)
|
|
{
|
|
gimple *use_stmt = USE_STMT (use_p);
|
|
if (is_gimple_debug (use_stmt))
|
|
@@ -2925,11 +3105,21 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
}
|
|
}
|
|
|
|
+ /* If we are vectorizing an inner reduction we are executing that
|
|
+ in the original order only in case we are not dealing with a
|
|
+ double reduction. */
|
|
+ if (nested_in_vect_loop && !inner_loop_of_double_reduc)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ report_vect_op (MSG_NOTE, def_stmt_info->stmt,
|
|
+ "detected nested cycle: ");
|
|
+ return def_stmt_info;
|
|
+ }
|
|
+
|
|
/* If this isn't a nested cycle or if the nested cycle reduction value
|
|
is used ouside of the inner loop we cannot handle uses of the reduction
|
|
value. */
|
|
- if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
|
|
- && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
|
|
+ if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
@@ -2939,11 +3129,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
|
|
/* If DEF_STMT is a phi node itself, we expect it to have a single argument
|
|
defined in the inner loop. */
|
|
- if (phi_def)
|
|
+ if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
|
|
{
|
|
- gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
|
|
- op1 = PHI_ARG_DEF (def_stmt, 0);
|
|
-
|
|
+ tree op1 = PHI_ARG_DEF (def_stmt, 0);
|
|
if (gimple_phi_num_args (def_stmt) != 1
|
|
|| TREE_CODE (op1) != SSA_NAME)
|
|
{
|
|
@@ -2974,290 +3162,74 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
return NULL;
|
|
}
|
|
|
|
- /* If we are vectorizing an inner reduction we are executing that
|
|
- in the original order only in case we are not dealing with a
|
|
- double reduction. */
|
|
- bool check_reduction = true;
|
|
- if (flow_loop_nested_p (vect_loop, loop))
|
|
- {
|
|
- gphi *lcphi;
|
|
- unsigned i;
|
|
- check_reduction = false;
|
|
- FOR_EACH_VEC_ELT (lcphis, i, lcphi)
|
|
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
|
|
- {
|
|
- gimple *use_stmt = USE_STMT (use_p);
|
|
- if (is_gimple_debug (use_stmt))
|
|
- continue;
|
|
- if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
|
|
- check_reduction = true;
|
|
- }
|
|
- }
|
|
-
|
|
- gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
|
|
- code = orig_code = gimple_assign_rhs_code (def_stmt);
|
|
-
|
|
- if (nested_in_vect_loop && !check_reduction)
|
|
- {
|
|
- /* FIXME: Even for non-reductions code generation is funneled
|
|
- through vectorizable_reduction for the stmt defining the
|
|
- PHI latch value. So we have to artificially restrict ourselves
|
|
- for the supported operations. */
|
|
- switch (get_gimple_rhs_class (code))
|
|
- {
|
|
- case GIMPLE_BINARY_RHS:
|
|
- case GIMPLE_TERNARY_RHS:
|
|
- break;
|
|
- default:
|
|
- /* Not supported by vectorizable_reduction. */
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
- "nested cycle: not handled operation: ");
|
|
- return NULL;
|
|
- }
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
|
|
- return def_stmt_info;
|
|
- }
|
|
-
|
|
- /* We can handle "res -= x[i]", which is non-associative by
|
|
- simply rewriting this into "res += -x[i]". Avoid changing
|
|
- gimple instruction for the first simple tests and only do this
|
|
- if we're allowed to change code at all. */
|
|
- if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
|
|
- code = PLUS_EXPR;
|
|
-
|
|
- if (code == COND_EXPR)
|
|
+ /* Look for the expression computing latch_def from then loop PHI result. */
|
|
+ auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
|
|
+ enum tree_code code;
|
|
+ if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
|
|
+ path))
|
|
{
|
|
- if (! nested_in_vect_loop)
|
|
- *v_reduc_type = COND_REDUCTION;
|
|
+ STMT_VINFO_REDUC_CODE (phi_info) = code;
|
|
+ if (code == COND_EXPR && !nested_in_vect_loop)
|
|
+ STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
|
|
|
|
- op3 = gimple_assign_rhs1 (def_stmt);
|
|
- if (COMPARISON_CLASS_P (op3))
|
|
- {
|
|
- op4 = TREE_OPERAND (op3, 1);
|
|
- op3 = TREE_OPERAND (op3, 0);
|
|
- }
|
|
- if (op3 == phi_name || op4 == phi_name)
|
|
+ /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
|
|
+ reduction chain for which the additional restriction is that
|
|
+ all operations in the chain are the same. */
|
|
+ auto_vec<stmt_vec_info, 8> reduc_chain;
|
|
+ unsigned i;
|
|
+ bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
|
|
+ for (i = path.length () - 1; i >= 1; --i)
|
|
{
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
- "reduction: condition depends on previous"
|
|
- " iteration: ");
|
|
- return NULL;
|
|
+ gimple *stmt = USE_STMT (path[i].second);
|
|
+ stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
|
|
+ STMT_VINFO_REDUC_IDX (stmt_info)
|
|
+ = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
|
|
+ enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
|
|
+ bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)
|
|
+ && (i == 1 || i == path.length () - 1));
|
|
+ if ((stmt_code != code && !leading_conversion)
|
|
+ /* We can only handle the final value in epilogue
|
|
+ generation for reduction chains. */
|
|
+ || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
|
|
+ is_slp_reduc = false;
|
|
+ /* For reduction chains we support a trailing/leading
|
|
+ conversions. We do not store those in the actual chain. */
|
|
+ if (leading_conversion)
|
|
+ continue;
|
|
+ reduc_chain.safe_push (stmt_info);
|
|
}
|
|
-
|
|
- op1 = gimple_assign_rhs2 (def_stmt);
|
|
- op2 = gimple_assign_rhs3 (def_stmt);
|
|
- }
|
|
- else if (!commutative_tree_code (code) || !associative_tree_code (code))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
- "reduction: not commutative/associative: ");
|
|
- return NULL;
|
|
- }
|
|
- else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
|
|
- {
|
|
- op1 = gimple_assign_rhs1 (def_stmt);
|
|
- op2 = gimple_assign_rhs2 (def_stmt);
|
|
- }
|
|
- else
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
- "reduction: not handled operation: ");
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
- "reduction: both uses not ssa_names: ");
|
|
-
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- type = TREE_TYPE (gimple_assign_lhs (def_stmt));
|
|
- if ((TREE_CODE (op1) == SSA_NAME
|
|
- && !types_compatible_p (type,TREE_TYPE (op1)))
|
|
- || (TREE_CODE (op2) == SSA_NAME
|
|
- && !types_compatible_p (type, TREE_TYPE (op2)))
|
|
- || (op3 && TREE_CODE (op3) == SSA_NAME
|
|
- && !types_compatible_p (type, TREE_TYPE (op3)))
|
|
- || (op4 && TREE_CODE (op4) == SSA_NAME
|
|
- && !types_compatible_p (type, TREE_TYPE (op4))))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- {
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "reduction: multiple types: operation type: "
|
|
- "%T, operands types: %T,%T",
|
|
- type, TREE_TYPE (op1), TREE_TYPE (op2));
|
|
- if (op3)
|
|
- dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
|
|
-
|
|
- if (op4)
|
|
- dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
|
|
- dump_printf (MSG_NOTE, "\n");
|
|
- }
|
|
-
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- /* Check whether it's ok to change the order of the computation.
|
|
- Generally, when vectorizing a reduction we change the order of the
|
|
- computation. This may change the behavior of the program in some
|
|
- cases, so we need to check that this is ok. One exception is when
|
|
- vectorizing an outer-loop: the inner-loop is executed sequentially,
|
|
- and therefore vectorizing reductions in the inner-loop during
|
|
- outer-loop vectorization is safe. */
|
|
- if (check_reduction
|
|
- && *v_reduc_type == TREE_CODE_REDUCTION
|
|
- && needs_fold_left_reduction_p (type, code,
|
|
- need_wrapping_integral_overflow))
|
|
- *v_reduc_type = FOLD_LEFT_REDUCTION;
|
|
-
|
|
- /* Reduction is safe. We're dealing with one of the following:
|
|
- 1) integer arithmetic and no trapv
|
|
- 2) floating point arithmetic, and special flags permit this optimization
|
|
- 3) nested cycle (i.e., outer loop vectorization). */
|
|
- stmt_vec_info def1_info = loop_info->lookup_def (op1);
|
|
- stmt_vec_info def2_info = loop_info->lookup_def (op2);
|
|
- if (code != COND_EXPR && !def1_info && !def2_info)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- /* Check that one def is the reduction def, defined by PHI,
|
|
- the other def is either defined in the loop ("vect_internal_def"),
|
|
- or it's an induction (defined by a loop-header phi-node). */
|
|
-
|
|
- if (def2_info
|
|
- && def2_info->stmt == phi
|
|
- && (code == COND_EXPR
|
|
- || !def1_info
|
|
- || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
|
|
- || vect_valid_reduction_input_p (def1_info)))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
|
|
- return def_stmt_info;
|
|
- }
|
|
-
|
|
- if (def1_info
|
|
- && def1_info->stmt == phi
|
|
- && (code == COND_EXPR
|
|
- || !def2_info
|
|
- || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
|
|
- || vect_valid_reduction_input_p (def2_info)))
|
|
- {
|
|
- if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
|
|
+ if (is_slp_reduc && reduc_chain.length () > 1)
|
|
{
|
|
- /* Check if we can swap operands (just for simplicity - so that
|
|
- the rest of the code can assume that the reduction variable
|
|
- is always the last (second) argument). */
|
|
- if (code == COND_EXPR)
|
|
+ for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
|
|
{
|
|
- /* Swap cond_expr by inverting the condition. */
|
|
- tree cond_expr = gimple_assign_rhs1 (def_stmt);
|
|
- enum tree_code invert_code = ERROR_MARK;
|
|
- enum tree_code cond_code = TREE_CODE (cond_expr);
|
|
-
|
|
- if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
|
|
- {
|
|
- bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
|
|
- invert_code = invert_tree_comparison (cond_code, honor_nans);
|
|
- }
|
|
- if (invert_code != ERROR_MARK)
|
|
- {
|
|
- TREE_SET_CODE (cond_expr, invert_code);
|
|
- swap_ssa_operands (def_stmt,
|
|
- gimple_assign_rhs2_ptr (def_stmt),
|
|
- gimple_assign_rhs3_ptr (def_stmt));
|
|
- }
|
|
- else
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt,
|
|
- "detected reduction: cannot swap operands "
|
|
- "for cond_expr");
|
|
- return NULL;
|
|
- }
|
|
+ REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
|
|
+ REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
|
|
}
|
|
- else
|
|
- swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
|
|
- gimple_assign_rhs2_ptr (def_stmt));
|
|
-
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt,
|
|
- "detected reduction: need to swap operands: ");
|
|
-
|
|
- if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
|
|
- LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
|
|
- }
|
|
- else
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
|
|
- }
|
|
+ REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
|
|
+ REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
|
|
|
|
- return def_stmt_info;
|
|
- }
|
|
+ /* Save the chain for further analysis in SLP detection. */
|
|
+ LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
|
|
+ REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
|
|
|
|
- /* Try to find SLP reduction chain. */
|
|
- if (! nested_in_vect_loop
|
|
- && code != COND_EXPR
|
|
- && orig_code != MINUS_EXPR
|
|
- && vect_is_slp_reduction (loop_info, phi, def_stmt))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- report_vect_op (MSG_NOTE, def_stmt,
|
|
- "reduction: detected reduction chain: ");
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "reduction: detected reduction chain\n");
|
|
+ }
|
|
+ else if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "reduction: detected reduction\n");
|
|
|
|
return def_stmt_info;
|
|
}
|
|
|
|
- /* Look for the expression computing loop_arg from loop PHI result. */
|
|
- if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
|
|
- return def_stmt_info;
|
|
-
|
|
if (dump_enabled_p ())
|
|
- {
|
|
- report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
|
|
- "reduction: unknown pattern: ");
|
|
- }
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "reduction: unknown pattern\n");
|
|
|
|
return NULL;
|
|
}
|
|
|
|
-/* Wrapper around vect_is_simple_reduction, which will modify code
|
|
- in-place if it enables detection of more reductions. Arguments
|
|
- as there. */
|
|
-
|
|
-stmt_vec_info
|
|
-vect_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
|
|
- bool *double_reduc,
|
|
- bool need_wrapping_integral_overflow)
|
|
-{
|
|
- enum vect_reduction_type v_reduc_type;
|
|
- stmt_vec_info def_info
|
|
- = vect_is_simple_reduction (loop_info, phi_info, double_reduc,
|
|
- need_wrapping_integral_overflow,
|
|
- &v_reduc_type);
|
|
- if (def_info)
|
|
- {
|
|
- STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
|
|
- STMT_VINFO_REDUC_DEF (phi_info) = def_info;
|
|
- STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
|
|
- STMT_VINFO_REDUC_DEF (def_info) = phi_info;
|
|
- }
|
|
- return def_info;
|
|
-}
|
|
-
|
|
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
|
|
int
|
|
vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
|
|
@@ -3601,7 +3573,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
|
|
&vec_inside_cost, &vec_epilogue_cost);
|
|
|
|
vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
|
|
-
|
|
+
|
|
+ /* Stash the costs so that we can compare two loop_vec_infos. */
|
|
+ loop_vinfo->vec_inside_cost = vec_inside_cost;
|
|
+ loop_vinfo->vec_outside_cost = vec_outside_cost;
|
|
+
|
|
if (dump_enabled_p ())
|
|
{
|
|
dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
|
|
@@ -3846,6 +3822,7 @@ have_whole_vector_shift (machine_mode mode)
|
|
|
|
static void
|
|
vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
|
+ vect_reduction_type reduction_type,
|
|
int ncopies, stmt_vector_for_cost *cost_vec)
|
|
{
|
|
int prologue_cost = 0, epilogue_cost = 0, inside_cost;
|
|
@@ -3860,8 +3837,6 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
|
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
|
|
|
/* Condition reductions generate two reductions in the loop. */
|
|
- vect_reduction_type reduction_type
|
|
- = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
|
|
if (reduction_type == COND_REDUCTION)
|
|
ncopies *= 2;
|
|
|
|
@@ -4080,15 +4055,15 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
|
|
|
|
A cost model should help decide between these two schemes. */
|
|
|
|
-tree
|
|
-get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
|
|
+static tree
|
|
+get_initial_def_for_reduction (stmt_vec_info stmt_vinfo,
|
|
+ enum tree_code code, tree init_val,
|
|
tree *adjustment_def)
|
|
{
|
|
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
|
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
|
tree scalar_type = TREE_TYPE (init_val);
|
|
- tree vectype = get_vectype_for_scalar_type (scalar_type);
|
|
- enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
|
|
+ tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
|
|
tree def_for_init;
|
|
tree init_def;
|
|
REAL_VALUE_TYPE real_init_val = dconst0;
|
|
@@ -4103,8 +4078,10 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
|
|
gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)
|
|
|| loop == (gimple_bb (stmt_vinfo->stmt))->loop_father);
|
|
|
|
- vect_reduction_type reduction_type
|
|
- = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
|
|
+ /* ADJUSTMENT_DEF is NULL when called from
|
|
+ vect_create_epilog_for_reduction to vectorize double reduction. */
|
|
+ if (adjustment_def)
|
|
+ *adjustment_def = NULL;
|
|
|
|
switch (code)
|
|
{
|
|
@@ -4118,11 +4095,6 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
|
|
case MULT_EXPR:
|
|
case BIT_AND_EXPR:
|
|
{
|
|
- /* ADJUSTMENT_DEF is NULL when called from
|
|
- vect_create_epilog_for_reduction to vectorize double reduction. */
|
|
- if (adjustment_def)
|
|
- *adjustment_def = init_val;
|
|
-
|
|
if (code == MULT_EXPR)
|
|
{
|
|
real_init_val = dconst1;
|
|
@@ -4137,10 +4109,14 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
|
|
else
|
|
def_for_init = build_int_cst (scalar_type, int_init_val);
|
|
|
|
- if (adjustment_def)
|
|
- /* Option1: the first element is '0' or '1' as well. */
|
|
- init_def = gimple_build_vector_from_val (&stmts, vectype,
|
|
- def_for_init);
|
|
+ if (adjustment_def || operand_equal_p (def_for_init, init_val, 0))
|
|
+ {
|
|
+ /* Option1: the first element is '0' or '1' as well. */
|
|
+ if (!operand_equal_p (def_for_init, init_val, 0))
|
|
+ *adjustment_def = init_val;
|
|
+ init_def = gimple_build_vector_from_val (&stmts, vectype,
|
|
+ def_for_init);
|
|
+ }
|
|
else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
|
|
{
|
|
/* Option2 (variable length): the first element is INIT_VAL. */
|
|
@@ -4164,16 +4140,6 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
|
|
case MAX_EXPR:
|
|
case COND_EXPR:
|
|
{
|
|
- if (adjustment_def)
|
|
- {
|
|
- *adjustment_def = NULL_TREE;
|
|
- if (reduction_type != COND_REDUCTION
|
|
- && reduction_type != EXTRACT_LAST_REDUCTION)
|
|
- {
|
|
- init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo);
|
|
- break;
|
|
- }
|
|
- }
|
|
init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
|
|
init_def = gimple_build_vector_from_val (&stmts, vectype, init_val);
|
|
}
|
|
@@ -4201,6 +4167,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
|
{
|
|
vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
|
|
stmt_vec_info stmt_vinfo = stmts[0];
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
unsigned HOST_WIDE_INT nunits;
|
|
unsigned j, number_of_places_left_in_vector;
|
|
tree vector_type;
|
|
@@ -4293,7 +4260,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
|
{
|
|
/* First time round, duplicate ELTS to fill the
|
|
required number of vectors. */
|
|
- duplicate_and_interleave (&ctor_seq, vector_type, elts,
|
|
+ duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts,
|
|
number_of_vectors, *vec_oprnds);
|
|
break;
|
|
}
|
|
@@ -4309,42 +4276,47 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
|
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
|
|
}
|
|
|
|
+/* For a statement STMT_INFO taking part in a reduction operation return
|
|
+ the stmt_vec_info the meta information is stored on. */
|
|
|
|
-/* Function vect_create_epilog_for_reduction
|
|
-
|
|
- Create code at the loop-epilog to finalize the result of a reduction
|
|
+stmt_vec_info
|
|
+info_for_reduction (stmt_vec_info stmt_info)
|
|
+{
|
|
+ stmt_info = vect_orig_stmt (stmt_info);
|
|
+ gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
|
|
+ if (!is_a <gphi *> (stmt_info->stmt))
|
|
+ stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
|
|
+ gphi *phi = as_a <gphi *> (stmt_info->stmt);
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
|
|
+ {
|
|
+ if (gimple_phi_num_args (phi) == 1)
|
|
+ stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
|
|
+ }
|
|
+ else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
|
|
+ {
|
|
+ edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father);
|
|
+ stmt_vec_info info
|
|
+ = stmt_info->vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe));
|
|
+ if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
|
|
+ stmt_info = info;
|
|
+ }
|
|
+ return stmt_info;
|
|
+}
|
|
+
|
|
+/* Function vect_create_epilog_for_reduction
|
|
+
|
|
+ Create code at the loop-epilog to finalize the result of a reduction
|
|
computation.
|
|
|
|
- VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
|
|
- reduction statements.
|
|
STMT_INFO is the scalar reduction stmt that is being vectorized.
|
|
- NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
|
|
- number of elements that we can fit in a vectype (nunits). In this case
|
|
- we have to generate more than one vector stmt - i.e - we need to "unroll"
|
|
- the vector stmt by a factor VF/nunits. For more details see documentation
|
|
- in vectorizable_operation.
|
|
- REDUC_FN is the internal function for the epilog reduction.
|
|
- REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
|
|
- computation.
|
|
- REDUC_INDEX is the index of the operand in the right hand side of the
|
|
- statement that is defined by REDUCTION_PHI.
|
|
- DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
|
|
SLP_NODE is an SLP node containing a group of reduction statements. The
|
|
first one in this group is STMT_INFO.
|
|
- INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
|
|
- when the COND_EXPR is never true in the loop. For MAX_EXPR, it needs to
|
|
- be smaller than any value of the IV in the loop, for MIN_EXPR larger than
|
|
- any value of the IV in the loop.
|
|
- INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION.
|
|
- NEUTRAL_OP is the value given by neutral_op_for_slp_reduction; it is
|
|
- null if this is not an SLP reduction
|
|
+ SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
|
|
+ REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
|
|
+ (counting from 0)
|
|
|
|
This function:
|
|
- 1. Creates the reduction def-use cycles: sets the arguments for
|
|
- REDUCTION_PHIS:
|
|
- The loop-entry argument is the vectorized initial-value of the reduction.
|
|
- The loop-latch argument is taken from VECT_DEFS - the vector of partial
|
|
- sums.
|
|
+ 1. Completes the reduction def-use cycles.
|
|
2. "Reduces" each vector of partial results VECT_DEFS into a single result,
|
|
by calling the function specified by REDUC_FN if available, or by
|
|
other means (whole-vector shifts or a scalar loop).
|
|
@@ -4354,7 +4326,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
|
The flow at the entry to this function:
|
|
|
|
loop:
|
|
- vec_def = phi <null, null> # REDUCTION_PHI
|
|
+ vec_def = phi <vec_init, null> # REDUCTION_PHI
|
|
VECT_DEF = vector_stmt # vectorized form of STMT_INFO
|
|
s_loop = scalar_stmt # (scalar) STMT_INFO
|
|
loop_exit:
|
|
@@ -4379,21 +4351,34 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
|
*/
|
|
|
|
static void
|
|
-vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
- stmt_vec_info stmt_info,
|
|
- gimple *reduc_def_stmt,
|
|
- int ncopies, internal_fn reduc_fn,
|
|
- vec<stmt_vec_info> reduction_phis,
|
|
- bool double_reduc,
|
|
+vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
|
|
slp_tree slp_node,
|
|
- slp_instance slp_node_instance,
|
|
- tree induc_val, enum tree_code induc_code,
|
|
- tree neutral_op)
|
|
+ slp_instance slp_node_instance)
|
|
{
|
|
+ stmt_vec_info reduc_info = info_for_reduction (stmt_info);
|
|
+ gcc_assert (reduc_info->is_reduc_info);
|
|
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
+ /* For double reductions we need to get at the inner loop reduction
|
|
+ stmt which has the meta info attached. Our stmt_info is that of the
|
|
+ loop-closed PHI of the inner loop which we remember as
|
|
+ def for the reduction PHI generation. */
|
|
+ bool double_reduc = false;
|
|
+ stmt_vec_info rdef_info = stmt_info;
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
|
|
+ {
|
|
+ gcc_assert (!slp_node);
|
|
+ double_reduc = true;
|
|
+ stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
|
|
+ (stmt_info->stmt, 0));
|
|
+ stmt_info = vect_stmt_to_vectorize (stmt_info);
|
|
+ }
|
|
+ gphi *reduc_def_stmt
|
|
+ = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
|
|
+ enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
|
|
+ internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
|
|
stmt_vec_info prev_phi_info;
|
|
tree vectype;
|
|
machine_mode mode;
|
|
- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
|
|
basic_block exit_bb;
|
|
tree scalar_dest;
|
|
@@ -4401,32 +4386,24 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
gimple *new_phi = NULL, *phi;
|
|
stmt_vec_info phi_info;
|
|
gimple_stmt_iterator exit_gsi;
|
|
- tree vec_dest;
|
|
- tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
|
|
+ tree new_temp = NULL_TREE, new_name, new_scalar_dest;
|
|
gimple *epilog_stmt = NULL;
|
|
- enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
|
|
gimple *exit_phi;
|
|
tree bitsize;
|
|
- tree adjustment_def = NULL;
|
|
- tree vec_initial_def = NULL;
|
|
- tree expr, def, initial_def = NULL;
|
|
+ tree def;
|
|
tree orig_name, scalar_result;
|
|
imm_use_iterator imm_iter, phi_imm_iter;
|
|
use_operand_p use_p, phi_use_p;
|
|
gimple *use_stmt;
|
|
- stmt_vec_info reduction_phi_info = NULL;
|
|
bool nested_in_vect_loop = false;
|
|
auto_vec<gimple *> new_phis;
|
|
- auto_vec<stmt_vec_info> inner_phis;
|
|
int j, i;
|
|
auto_vec<tree> scalar_results;
|
|
- unsigned int group_size = 1, k, ratio;
|
|
- auto_vec<tree> vec_initial_defs;
|
|
+ unsigned int group_size = 1, k;
|
|
auto_vec<gimple *> phis;
|
|
bool slp_reduc = false;
|
|
bool direct_slp_reduc;
|
|
tree new_phi_result;
|
|
- stmt_vec_info inner_phi = NULL;
|
|
tree induction_index = NULL_TREE;
|
|
|
|
if (slp_node)
|
|
@@ -4439,127 +4416,53 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
nested_in_vect_loop = true;
|
|
gcc_assert (!slp_node);
|
|
}
|
|
+ gcc_assert (!nested_in_vect_loop || double_reduc);
|
|
|
|
- vectype = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
|
|
gcc_assert (vectype);
|
|
mode = TYPE_MODE (vectype);
|
|
|
|
- /* 1. Create the reduction def-use cycle:
|
|
- Set the arguments of REDUCTION_PHIS, i.e., transform
|
|
-
|
|
- loop:
|
|
- vec_def = phi <null, null> # REDUCTION_PHI
|
|
- VECT_DEF = vector_stmt # vectorized form of STMT
|
|
- ...
|
|
-
|
|
- into:
|
|
-
|
|
- loop:
|
|
- vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
|
|
- VECT_DEF = vector_stmt # vectorized form of STMT
|
|
- ...
|
|
-
|
|
- (in case of SLP, do it for all the phis). */
|
|
-
|
|
- /* Get the loop-entry arguments. */
|
|
- enum vect_def_type initial_def_dt = vect_unknown_def_type;
|
|
+ tree initial_def = NULL;
|
|
+ tree induc_val = NULL_TREE;
|
|
+ tree adjustment_def = NULL;
|
|
if (slp_node)
|
|
- {
|
|
- unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
|
- vec_initial_defs.reserve (vec_num);
|
|
- get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
|
|
- &vec_initial_defs, vec_num,
|
|
- REDUC_GROUP_FIRST_ELEMENT (stmt_info),
|
|
- neutral_op);
|
|
- }
|
|
+ ;
|
|
else
|
|
{
|
|
/* Get at the scalar def before the loop, that defines the initial value
|
|
of the reduction variable. */
|
|
initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
|
|
loop_preheader_edge (loop));
|
|
- /* Optimize: if initial_def is for REDUC_MAX smaller than the base
|
|
- and we can't use zero for induc_val, use initial_def. Similarly
|
|
- for REDUC_MIN and initial_def larger than the base. */
|
|
- if (TREE_CODE (initial_def) == INTEGER_CST
|
|
- && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == INTEGER_INDUC_COND_REDUCTION)
|
|
- && !integer_zerop (induc_val)
|
|
- && ((induc_code == MAX_EXPR
|
|
- && tree_int_cst_lt (initial_def, induc_val))
|
|
- || (induc_code == MIN_EXPR
|
|
- && tree_int_cst_lt (induc_val, initial_def))))
|
|
- induc_val = initial_def;
|
|
-
|
|
- if (double_reduc)
|
|
- /* In case of double reduction we only create a vector variable
|
|
- to be put in the reduction phi node. The actual statement
|
|
- creation is done later in this function. */
|
|
- vec_initial_def = vect_create_destination_var (initial_def, vectype);
|
|
+ /* Optimize: for induction condition reduction, if we can't use zero
|
|
+ for induc_val, use initial_def. */
|
|
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
|
|
+ induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
|
|
+ else if (double_reduc)
|
|
+ ;
|
|
else if (nested_in_vect_loop)
|
|
- {
|
|
- /* Do not use an adjustment def as that case is not supported
|
|
- correctly if ncopies is not one. */
|
|
- vect_is_simple_use (initial_def, loop_vinfo, &initial_def_dt);
|
|
- vec_initial_def = vect_get_vec_def_for_operand (initial_def,
|
|
- stmt_info);
|
|
- }
|
|
+ ;
|
|
else
|
|
- vec_initial_def
|
|
- = get_initial_def_for_reduction (stmt_info, initial_def,
|
|
- &adjustment_def);
|
|
- vec_initial_defs.create (1);
|
|
- vec_initial_defs.quick_push (vec_initial_def);
|
|
+ adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
|
|
}
|
|
|
|
- /* Set phi nodes arguments. */
|
|
- FOR_EACH_VEC_ELT (reduction_phis, i, phi_info)
|
|
+ unsigned vec_num;
|
|
+ int ncopies;
|
|
+ if (slp_node)
|
|
{
|
|
- tree vec_init_def = vec_initial_defs[i];
|
|
- tree def = vect_defs[i];
|
|
- for (j = 0; j < ncopies; j++)
|
|
- {
|
|
- if (j != 0)
|
|
- {
|
|
- phi_info = STMT_VINFO_RELATED_STMT (phi_info);
|
|
- if (nested_in_vect_loop)
|
|
- vec_init_def
|
|
- = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_init_def);
|
|
- }
|
|
-
|
|
- /* Set the loop-entry arg of the reduction-phi. */
|
|
-
|
|
- gphi *phi = as_a <gphi *> (phi_info->stmt);
|
|
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == INTEGER_INDUC_COND_REDUCTION)
|
|
- {
|
|
- /* Initialise the reduction phi to zero. This prevents initial
|
|
- values of non-zero interferring with the reduction op. */
|
|
- gcc_assert (ncopies == 1);
|
|
- gcc_assert (i == 0);
|
|
-
|
|
- tree vec_init_def_type = TREE_TYPE (vec_init_def);
|
|
- tree induc_val_vec
|
|
- = build_vector_from_val (vec_init_def_type, induc_val);
|
|
-
|
|
- add_phi_arg (phi, induc_val_vec, loop_preheader_edge (loop),
|
|
- UNKNOWN_LOCATION);
|
|
- }
|
|
- else
|
|
- add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
|
|
- UNKNOWN_LOCATION);
|
|
-
|
|
- /* Set the loop-latch arg for the reduction-phi. */
|
|
- if (j > 0)
|
|
- def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
|
|
-
|
|
- add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
|
|
-
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "transform reduction: created def-use cycle: %G%G",
|
|
- phi, SSA_NAME_DEF_STMT (def));
|
|
- }
|
|
+ vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis).length ();
|
|
+ ncopies = 1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ vec_num = 1;
|
|
+ ncopies = 0;
|
|
+ phi_info = STMT_VINFO_VEC_STMT (loop_vinfo->lookup_stmt (reduc_def_stmt));
|
|
+ do
|
|
+ {
|
|
+ ncopies++;
|
|
+ phi_info = STMT_VINFO_RELATED_STMT (phi_info);
|
|
+ }
|
|
+ while (phi_info);
|
|
}
|
|
|
|
/* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
|
|
@@ -4569,7 +4472,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
The first match will be a 1 to allow 0 to be used for non-matching
|
|
indexes. If there are no matches at all then the vector will be all
|
|
zeroes. */
|
|
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
|
|
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
|
|
{
|
|
tree indx_before_incr, indx_after_incr;
|
|
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
|
|
@@ -4627,11 +4530,17 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
|
|
|
|
/* Create a conditional, where the condition is taken from vec_stmt
|
|
- (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
|
|
- else is the phi (NEW_PHI_TREE). */
|
|
- tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
|
|
- ccompare, indx_before_incr,
|
|
- new_phi_tree);
|
|
+ (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR:
|
|
+ the reduction phi corresponds to NEW_PHI_TREE and the new values
|
|
+ correspond to INDEX_BEFORE_INCR. */
|
|
+ gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1);
|
|
+ tree index_cond_expr;
|
|
+ if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
|
|
+ index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
|
|
+ ccompare, indx_before_incr, new_phi_tree);
|
|
+ else
|
|
+ index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
|
|
+ ccompare, new_phi_tree, indx_before_incr);
|
|
induction_index = make_ssa_name (cr_index_vector_type);
|
|
gimple *index_condition = gimple_build_assign (induction_index,
|
|
index_cond_expr);
|
|
@@ -4674,12 +4583,17 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
/* 2.1 Create new loop-exit-phis to preserve loop-closed form:
|
|
v_out1 = phi <VECT_DEF>
|
|
Store them in NEW_PHIS. */
|
|
-
|
|
+ if (double_reduc)
|
|
+ loop = outer_loop;
|
|
exit_bb = single_exit (loop)->dest;
|
|
prev_phi_info = NULL;
|
|
- new_phis.create (vect_defs.length ());
|
|
- FOR_EACH_VEC_ELT (vect_defs, i, def)
|
|
+ new_phis.create (slp_node ? vec_num : ncopies);
|
|
+ for (unsigned i = 0; i < vec_num; i++)
|
|
{
|
|
+ if (slp_node)
|
|
+ def = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
|
|
+ else
|
|
+ def = gimple_get_lhs (STMT_VINFO_VEC_STMT (rdef_info)->stmt);
|
|
for (j = 0; j < ncopies; j++)
|
|
{
|
|
tree new_def = copy_ssa_name (def);
|
|
@@ -4698,37 +4612,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
}
|
|
}
|
|
|
|
- /* The epilogue is created for the outer-loop, i.e., for the loop being
|
|
- vectorized. Create exit phis for the outer loop. */
|
|
- if (double_reduc)
|
|
- {
|
|
- loop = outer_loop;
|
|
- exit_bb = single_exit (loop)->dest;
|
|
- inner_phis.create (vect_defs.length ());
|
|
- FOR_EACH_VEC_ELT (new_phis, i, phi)
|
|
- {
|
|
- stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
|
|
- tree new_result = copy_ssa_name (PHI_RESULT (phi));
|
|
- gphi *outer_phi = create_phi_node (new_result, exit_bb);
|
|
- SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
|
|
- PHI_RESULT (phi));
|
|
- prev_phi_info = loop_vinfo->add_stmt (outer_phi);
|
|
- inner_phis.quick_push (phi_info);
|
|
- new_phis[i] = outer_phi;
|
|
- while (STMT_VINFO_RELATED_STMT (phi_info))
|
|
- {
|
|
- phi_info = STMT_VINFO_RELATED_STMT (phi_info);
|
|
- new_result = copy_ssa_name (PHI_RESULT (phi_info->stmt));
|
|
- outer_phi = create_phi_node (new_result, exit_bb);
|
|
- SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
|
|
- PHI_RESULT (phi_info->stmt));
|
|
- stmt_vec_info outer_phi_info = loop_vinfo->add_stmt (outer_phi);
|
|
- STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi_info;
|
|
- prev_phi_info = outer_phi_info;
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
exit_gsi = gsi_after_labels (exit_bb);
|
|
|
|
/* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
|
|
@@ -4747,12 +4630,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
|
|
gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
|
|
}
|
|
-
|
|
- code = gimple_assign_rhs_code (orig_stmt_info->stmt);
|
|
- /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
|
|
- partial results are added and not subtracted. */
|
|
- if (code == MINUS_EXPR)
|
|
- code = PLUS_EXPR;
|
|
|
|
scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt);
|
|
scalar_type = TREE_TYPE (scalar_dest);
|
|
@@ -4760,15 +4637,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
|
|
bitsize = TYPE_SIZE (scalar_type);
|
|
|
|
- /* In case this is a reduction in an inner-loop while vectorizing an outer
|
|
- loop - we don't need to extract a single scalar result at the end of the
|
|
- inner-loop (unless it is double reduction, i.e., the use of reduction is
|
|
- outside the outer-loop). The final vector of partial results will be used
|
|
- in the vectorized outer-loop, or reduced to a scalar result at the end of
|
|
- the outer-loop. */
|
|
- if (nested_in_vect_loop && !double_reduc)
|
|
- goto vect_finalize_reduction;
|
|
-
|
|
/* SLP reduction without reduction chain, e.g.,
|
|
# a1 = phi <a2, a0>
|
|
# b1 = phi <b2, b0>
|
|
@@ -4791,53 +4659,48 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
one vector. */
|
|
if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
|
|
{
|
|
+ gimple_seq stmts = NULL;
|
|
tree first_vect = PHI_RESULT (new_phis[0]);
|
|
- gassign *new_vec_stmt = NULL;
|
|
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
|
+ first_vect = gimple_convert (&stmts, vectype, first_vect);
|
|
for (k = 1; k < new_phis.length (); k++)
|
|
{
|
|
gimple *next_phi = new_phis[k];
|
|
tree second_vect = PHI_RESULT (next_phi);
|
|
- tree tem = make_ssa_name (vec_dest, new_vec_stmt);
|
|
- new_vec_stmt = gimple_build_assign (tem, code,
|
|
- first_vect, second_vect);
|
|
- gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
|
|
- first_vect = tem;
|
|
+ second_vect = gimple_convert (&stmts, vectype, second_vect);
|
|
+ first_vect = gimple_build (&stmts, code, vectype,
|
|
+ first_vect, second_vect);
|
|
}
|
|
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
|
|
|
|
new_phi_result = first_vect;
|
|
- if (new_vec_stmt)
|
|
- {
|
|
- new_phis.truncate (0);
|
|
- new_phis.safe_push (new_vec_stmt);
|
|
- }
|
|
+ new_phis.truncate (0);
|
|
+ new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
|
|
}
|
|
/* Likewise if we couldn't use a single defuse cycle. */
|
|
else if (ncopies > 1)
|
|
{
|
|
gcc_assert (new_phis.length () == 1);
|
|
+ gimple_seq stmts = NULL;
|
|
tree first_vect = PHI_RESULT (new_phis[0]);
|
|
- gassign *new_vec_stmt = NULL;
|
|
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
|
+ first_vect = gimple_convert (&stmts, vectype, first_vect);
|
|
stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
|
|
for (int k = 1; k < ncopies; ++k)
|
|
{
|
|
next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
|
|
tree second_vect = PHI_RESULT (next_phi_info->stmt);
|
|
- tree tem = make_ssa_name (vec_dest, new_vec_stmt);
|
|
- new_vec_stmt = gimple_build_assign (tem, code,
|
|
- first_vect, second_vect);
|
|
- gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
|
|
- first_vect = tem;
|
|
+ second_vect = gimple_convert (&stmts, vectype, second_vect);
|
|
+ first_vect = gimple_build (&stmts, code, vectype,
|
|
+ first_vect, second_vect);
|
|
}
|
|
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
|
|
new_phi_result = first_vect;
|
|
new_phis.truncate (0);
|
|
- new_phis.safe_push (new_vec_stmt);
|
|
+ new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
|
|
}
|
|
else
|
|
new_phi_result = PHI_RESULT (new_phis[0]);
|
|
|
|
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
|
|
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
|
|
&& reduc_fn != IFN_LAST)
|
|
{
|
|
/* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
|
|
@@ -4852,8 +4715,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
tree index_vec_type = TREE_TYPE (induction_index);
|
|
gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
|
|
tree index_scalar_type = TREE_TYPE (index_vec_type);
|
|
- tree index_vec_cmp_type = build_same_sized_truth_vector_type
|
|
- (index_vec_type);
|
|
+ tree index_vec_cmp_type = truth_type_for (index_vec_type);
|
|
|
|
/* Get an unsigned integer version of the type of the data vector. */
|
|
int scalar_precision
|
|
@@ -4946,7 +4808,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
|
|
scalar_results.safe_push (new_temp);
|
|
}
|
|
- else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
|
|
+ else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
|
|
&& reduc_fn == IFN_LAST)
|
|
{
|
|
/* Condition reduction without supported IFN_REDUC_MAX. Generate
|
|
@@ -4989,7 +4851,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
if (off != 0)
|
|
{
|
|
tree new_idx_val = idx_val;
|
|
- tree new_val = val;
|
|
if (off != v_size - el_size)
|
|
{
|
|
new_idx_val = make_ssa_name (idx_eltype);
|
|
@@ -4998,7 +4859,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
old_idx_val);
|
|
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
}
|
|
- new_val = make_ssa_name (data_eltype);
|
|
+ tree new_val = make_ssa_name (data_eltype);
|
|
epilog_stmt = gimple_build_assign (new_val,
|
|
COND_EXPR,
|
|
build2 (GT_EXPR,
|
|
@@ -5060,9 +4921,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
gimple_set_lhs (epilog_stmt, new_temp);
|
|
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
|
|
- if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == INTEGER_INDUC_COND_REDUCTION)
|
|
- && !operand_equal_p (initial_def, induc_val, 0))
|
|
+ if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
|
|
+ && induc_val)
|
|
{
|
|
/* Earlier we set the initial value to be a vector if induc_val
|
|
values. Check the result and if it is induc_val then replace
|
|
@@ -5100,7 +4960,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
tree index = build_index_vector (vectype, 0, 1);
|
|
tree index_type = TREE_TYPE (index);
|
|
tree index_elt_type = TREE_TYPE (index_type);
|
|
- tree mask_type = build_same_sized_truth_vector_type (index_type);
|
|
+ tree mask_type = truth_type_for (index_type);
|
|
|
|
/* Create a vector that, for each element, identifies which of
|
|
the REDUC_GROUP_SIZE results should use it. */
|
|
@@ -5112,6 +4972,14 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
scalar value if we have one, otherwise the initial scalar value
|
|
is itself a neutral value. */
|
|
tree vector_identity = NULL_TREE;
|
|
+ tree neutral_op = NULL_TREE;
|
|
+ if (slp_node)
|
|
+ {
|
|
+ stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
|
|
+ neutral_op
|
|
+ = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
|
|
+ vectype, code, first != NULL);
|
|
+ }
|
|
if (neutral_op)
|
|
vector_identity = gimple_build_vector_from_val (&seq, vectype,
|
|
neutral_op);
|
|
@@ -5161,32 +5029,19 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
bool reduce_with_shift;
|
|
tree vec_temp;
|
|
|
|
- /* COND reductions all do the final reduction with MAX_EXPR
|
|
- or MIN_EXPR. */
|
|
- if (code == COND_EXPR)
|
|
- {
|
|
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == INTEGER_INDUC_COND_REDUCTION)
|
|
- code = induc_code;
|
|
- else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == CONST_COND_REDUCTION)
|
|
- code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
|
|
- else
|
|
- code = MAX_EXPR;
|
|
- }
|
|
-
|
|
/* See if the target wants to do the final (shift) reduction
|
|
in a vector mode of smaller size and first reduce upper/lower
|
|
halves against each other. */
|
|
enum machine_mode mode1 = mode;
|
|
- tree vectype1 = vectype;
|
|
- unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
|
|
- unsigned sz1 = sz;
|
|
+ tree stype = TREE_TYPE (vectype);
|
|
+ unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
|
|
+ unsigned nunits1 = nunits;
|
|
if (!slp_reduc
|
|
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
|
|
- sz1 = GET_MODE_SIZE (mode1).to_constant ();
|
|
+ nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
|
|
|
|
- vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
|
|
+ tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
|
|
+ stype, nunits1);
|
|
reduce_with_shift = have_whole_vector_shift (mode1);
|
|
if (!VECTOR_MODE_P (mode1))
|
|
reduce_with_shift = false;
|
|
@@ -5200,11 +5055,13 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
/* First reduce the vector to the desired vector size we should
|
|
do shift reduction on by combining upper and lower halves. */
|
|
new_temp = new_phi_result;
|
|
- while (sz > sz1)
|
|
+ while (nunits > nunits1)
|
|
{
|
|
gcc_assert (!slp_reduc);
|
|
- sz /= 2;
|
|
- vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
|
|
+ nunits /= 2;
|
|
+ vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
|
|
+ stype, nunits);
|
|
+ unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1));
|
|
|
|
/* The target has to make sure we support lowpart/highpart
|
|
extraction, either via direct vector extract or through
|
|
@@ -5229,15 +5086,14 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
= gimple_build_assign (dst2, BIT_FIELD_REF,
|
|
build3 (BIT_FIELD_REF, vectype1,
|
|
new_temp, TYPE_SIZE (vectype1),
|
|
- bitsize_int (sz * BITS_PER_UNIT)));
|
|
+ bitsize_int (bitsize)));
|
|
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
}
|
|
else
|
|
{
|
|
/* Extract via punning to appropriately sized integer mode
|
|
vector. */
|
|
- tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
|
|
- 1);
|
|
+ tree eltype = build_nonstandard_integer_type (bitsize, 1);
|
|
tree etype = build_vector_type (eltype, 2);
|
|
gcc_assert (convert_optab_handler (vec_extract_optab,
|
|
TYPE_MODE (etype),
|
|
@@ -5266,7 +5122,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
= gimple_build_assign (tem, BIT_FIELD_REF,
|
|
build3 (BIT_FIELD_REF, eltype,
|
|
new_temp, TYPE_SIZE (eltype),
|
|
- bitsize_int (sz * BITS_PER_UNIT)));
|
|
+ bitsize_int (bitsize)));
|
|
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
dst2 = make_ssa_name (vectype1);
|
|
epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
|
|
@@ -5307,8 +5163,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Reduce using vector shifts\n");
|
|
|
|
- mode1 = TYPE_MODE (vectype1);
|
|
- vec_dest = vect_create_destination_var (scalar_dest, vectype1);
|
|
+ gimple_seq stmts = NULL;
|
|
+ new_temp = gimple_convert (&stmts, vectype1, new_temp);
|
|
for (elt_offset = nelements / 2;
|
|
elt_offset >= 1;
|
|
elt_offset /= 2)
|
|
@@ -5316,18 +5172,12 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
|
|
indices.new_vector (sel, 2, nelements);
|
|
tree mask = vect_gen_perm_mask_any (vectype1, indices);
|
|
- epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
|
|
- new_temp, zero_vec, mask);
|
|
- new_name = make_ssa_name (vec_dest, epilog_stmt);
|
|
- gimple_assign_set_lhs (epilog_stmt, new_name);
|
|
- gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
-
|
|
- epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
|
|
- new_temp);
|
|
- new_temp = make_ssa_name (vec_dest, epilog_stmt);
|
|
- gimple_assign_set_lhs (epilog_stmt, new_temp);
|
|
- gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
+ new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
|
|
+ new_temp, zero_vec, mask);
|
|
+ new_temp = gimple_build (&stmts, code,
|
|
+ vectype1, new_name, new_temp);
|
|
}
|
|
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
|
|
|
|
/* 2.4 Extract the final scalar result. Create:
|
|
s_out3 = extract_field <v_out2, bitpos> */
|
|
@@ -5439,9 +5289,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
scalar_results.safe_push (new_temp);
|
|
}
|
|
|
|
- if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == INTEGER_INDUC_COND_REDUCTION)
|
|
- && !operand_equal_p (initial_def, induc_val, 0))
|
|
+ if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
|
|
+ && induc_val)
|
|
{
|
|
/* Earlier we set the initial value to be a vector if induc_val
|
|
values. Check the result and if it is induc_val then replace
|
|
@@ -5457,12 +5306,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
|
|
scalar_results[0] = tmp;
|
|
}
|
|
}
|
|
-
|
|
-vect_finalize_reduction:
|
|
-
|
|
- if (double_reduc)
|
|
- loop = loop->inner;
|
|
-
|
|
+
|
|
/* 2.5 Adjust the final result by the initial value of the reduction
|
|
variable. (When such adjustment is not needed, then
|
|
'adjustment_def' is zero). For example, if code is PLUS we create:
|
|
@@ -5471,25 +5315,26 @@ vect_finalize_reduction:
|
|
if (adjustment_def)
|
|
{
|
|
gcc_assert (!slp_reduc);
|
|
+ gimple_seq stmts = NULL;
|
|
if (nested_in_vect_loop)
|
|
{
|
|
new_phi = new_phis[0];
|
|
- gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
|
|
- expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
|
|
- new_dest = vect_create_destination_var (scalar_dest, vectype);
|
|
+ gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
|
|
+ adjustment_def = gimple_convert (&stmts, vectype, adjustment_def);
|
|
+ new_temp = gimple_build (&stmts, code, vectype,
|
|
+ PHI_RESULT (new_phi), adjustment_def);
|
|
}
|
|
else
|
|
{
|
|
new_temp = scalar_results[0];
|
|
gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
|
|
- expr = build2 (code, scalar_type, new_temp, adjustment_def);
|
|
- new_dest = vect_create_destination_var (scalar_dest, scalar_type);
|
|
+ adjustment_def = gimple_convert (&stmts, scalar_type, adjustment_def);
|
|
+ new_temp = gimple_build (&stmts, code, scalar_type,
|
|
+ new_temp, adjustment_def);
|
|
}
|
|
|
|
- epilog_stmt = gimple_build_assign (new_dest, expr);
|
|
- new_temp = make_ssa_name (new_dest, epilog_stmt);
|
|
- gimple_assign_set_lhs (epilog_stmt, new_temp);
|
|
- gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
|
+ epilog_stmt = gimple_seq_last_stmt (stmts);
|
|
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
|
|
if (nested_in_vect_loop)
|
|
{
|
|
stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt);
|
|
@@ -5507,6 +5352,9 @@ vect_finalize_reduction:
|
|
new_phis[0] = epilog_stmt;
|
|
}
|
|
|
|
+ if (double_reduc)
|
|
+ loop = loop->inner;
|
|
+
|
|
/* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
|
|
phis with new adjusted scalar results, i.e., replace use <s_out0>
|
|
with use <s_out4>.
|
|
@@ -5552,24 +5400,10 @@ vect_finalize_reduction:
|
|
correspond to the first vector stmt, etc.
|
|
(RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)). */
|
|
if (group_size > new_phis.length ())
|
|
- {
|
|
- ratio = group_size / new_phis.length ();
|
|
- gcc_assert (!(group_size % new_phis.length ()));
|
|
- }
|
|
- else
|
|
- ratio = 1;
|
|
+ gcc_assert (!(group_size % new_phis.length ()));
|
|
|
|
- stmt_vec_info epilog_stmt_info = NULL;
|
|
for (k = 0; k < group_size; k++)
|
|
{
|
|
- if (k % ratio == 0)
|
|
- {
|
|
- epilog_stmt_info = loop_vinfo->lookup_stmt (new_phis[k / ratio]);
|
|
- reduction_phi_info = reduction_phis[k / ratio];
|
|
- if (double_reduc)
|
|
- inner_phi = inner_phis[k / ratio];
|
|
- }
|
|
-
|
|
if (slp_reduc)
|
|
{
|
|
stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
|
|
@@ -5580,121 +5414,12 @@ vect_finalize_reduction:
|
|
scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
|
|
}
|
|
|
|
- phis.create (3);
|
|
- /* Find the loop-closed-use at the loop exit of the original scalar
|
|
- result. (The reduction result is expected to have two immediate uses -
|
|
- one at the latch block, and one at the loop exit). */
|
|
- FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
|
- if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
|
|
- && !is_gimple_debug (USE_STMT (use_p)))
|
|
- phis.safe_push (USE_STMT (use_p));
|
|
-
|
|
- /* While we expect to have found an exit_phi because of loop-closed-ssa
|
|
- form we can end up without one if the scalar cycle is dead. */
|
|
-
|
|
- FOR_EACH_VEC_ELT (phis, i, exit_phi)
|
|
- {
|
|
- if (outer_loop)
|
|
- {
|
|
- stmt_vec_info exit_phi_vinfo
|
|
- = loop_vinfo->lookup_stmt (exit_phi);
|
|
- gphi *vect_phi;
|
|
-
|
|
- if (double_reduc)
|
|
- STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
|
|
- else
|
|
- STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt_info;
|
|
- if (!double_reduc
|
|
- || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
|
|
- != vect_double_reduction_def)
|
|
- continue;
|
|
-
|
|
- /* Handle double reduction:
|
|
-
|
|
- stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
|
|
- stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
|
|
- stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
|
|
- stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
|
|
-
|
|
- At that point the regular reduction (stmt2 and stmt3) is
|
|
- already vectorized, as well as the exit phi node, stmt4.
|
|
- Here we vectorize the phi node of double reduction, stmt1, and
|
|
- update all relevant statements. */
|
|
-
|
|
- /* Go through all the uses of s2 to find double reduction phi
|
|
- node, i.e., stmt1 above. */
|
|
- orig_name = PHI_RESULT (exit_phi);
|
|
- FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
|
|
- {
|
|
- stmt_vec_info use_stmt_vinfo;
|
|
- tree vect_phi_init, preheader_arg, vect_phi_res;
|
|
- basic_block bb = gimple_bb (use_stmt);
|
|
-
|
|
- /* Check that USE_STMT is really double reduction phi
|
|
- node. */
|
|
- if (gimple_code (use_stmt) != GIMPLE_PHI
|
|
- || gimple_phi_num_args (use_stmt) != 2
|
|
- || bb->loop_father != outer_loop)
|
|
- continue;
|
|
- use_stmt_vinfo = loop_vinfo->lookup_stmt (use_stmt);
|
|
- if (!use_stmt_vinfo
|
|
- || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
|
|
- != vect_double_reduction_def)
|
|
- continue;
|
|
-
|
|
- /* Create vector phi node for double reduction:
|
|
- vs1 = phi <vs0, vs2>
|
|
- vs1 was created previously in this function by a call to
|
|
- vect_get_vec_def_for_operand and is stored in
|
|
- vec_initial_def;
|
|
- vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
|
|
- vs0 is created here. */
|
|
-
|
|
- /* Create vector phi node. */
|
|
- vect_phi = create_phi_node (vec_initial_def, bb);
|
|
- loop_vec_info_for_loop (outer_loop)->add_stmt (vect_phi);
|
|
-
|
|
- /* Create vs0 - initial def of the double reduction phi. */
|
|
- preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
|
|
- loop_preheader_edge (outer_loop));
|
|
- vect_phi_init = get_initial_def_for_reduction
|
|
- (stmt_info, preheader_arg, NULL);
|
|
-
|
|
- /* Update phi node arguments with vs0 and vs2. */
|
|
- add_phi_arg (vect_phi, vect_phi_init,
|
|
- loop_preheader_edge (outer_loop),
|
|
- UNKNOWN_LOCATION);
|
|
- add_phi_arg (vect_phi, PHI_RESULT (inner_phi->stmt),
|
|
- loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "created double reduction phi node: %G",
|
|
- vect_phi);
|
|
-
|
|
- vect_phi_res = PHI_RESULT (vect_phi);
|
|
-
|
|
- /* Replace the use, i.e., set the correct vs1 in the regular
|
|
- reduction phi node. FORNOW, NCOPIES is always 1, so the
|
|
- loop is redundant. */
|
|
- stmt_vec_info use_info = reduction_phi_info;
|
|
- for (j = 0; j < ncopies; j++)
|
|
- {
|
|
- edge pr_edge = loop_preheader_edge (loop);
|
|
- SET_PHI_ARG_DEF (as_a <gphi *> (use_info->stmt),
|
|
- pr_edge->dest_idx, vect_phi_res);
|
|
- use_info = STMT_VINFO_RELATED_STMT (use_info);
|
|
- }
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- phis.release ();
|
|
if (nested_in_vect_loop)
|
|
{
|
|
if (double_reduc)
|
|
loop = outer_loop;
|
|
else
|
|
- continue;
|
|
+ gcc_unreachable ();
|
|
}
|
|
|
|
phis.create (3);
|
|
@@ -5824,9 +5549,6 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
|
|
gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
|
|
gcc_assert (ncopies == 1);
|
|
gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
|
|
- gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
|
|
- gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- == FOLD_LEFT_REDUCTION);
|
|
|
|
if (slp_node)
|
|
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
|
|
@@ -5840,10 +5562,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
|
|
if (slp_node)
|
|
{
|
|
auto_vec<vec<tree> > vec_defs (2);
|
|
- auto_vec<tree> sops(2);
|
|
- sops.quick_push (ops[0]);
|
|
- sops.quick_push (ops[1]);
|
|
- vect_get_slp_defs (sops, slp_node, &vec_defs);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
|
|
vec_defs[0].release ();
|
|
vec_defs[1].release ();
|
|
@@ -5984,6 +5703,55 @@ is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
|
|
<= TYPE_PRECISION (lhs_type));
|
|
}
|
|
|
|
+/* Check if masking can be supported by inserting a conditional expression.
|
|
+ CODE is the code for the operation. COND_FN is the conditional internal
|
|
+ function, if it exists. VECTYPE_IN is the type of the vector input. */
|
|
+static bool
|
|
+use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn,
|
|
+ tree vectype_in)
|
|
+{
|
|
+ if (cond_fn != IFN_LAST
|
|
+ && direct_internal_fn_supported_p (cond_fn, vectype_in,
|
|
+ OPTIMIZE_FOR_SPEED))
|
|
+ return false;
|
|
+
|
|
+ switch (code)
|
|
+ {
|
|
+ case DOT_PROD_EXPR:
|
|
+ return true;
|
|
+
|
|
+ default:
|
|
+ return false;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Insert a conditional expression to enable masked vectorization. CODE is the
|
|
+ code for the operation. VOP is the array of operands. MASK is the loop
|
|
+ mask. GSI is a statement iterator used to place the new conditional
|
|
+ expression. */
|
|
+static void
|
|
+build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
|
|
+ gimple_stmt_iterator *gsi)
|
|
+{
|
|
+ switch (code)
|
|
+ {
|
|
+ case DOT_PROD_EXPR:
|
|
+ {
|
|
+ tree vectype = TREE_TYPE (vop[1]);
|
|
+ tree zero = build_zero_cst (vectype);
|
|
+ tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
|
|
+ gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
|
|
+ mask, vop[1], zero);
|
|
+ gsi_insert_before (gsi, select, GSI_SAME_STMT);
|
|
+ vop[1] = masked_op1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+}
|
|
+
|
|
/* Function vectorizable_reduction.
|
|
|
|
Check if STMT_INFO performs a reduction operation that can be vectorized.
|
|
@@ -6027,182 +5795,163 @@ is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
|
|
corresponds to the type of arguments to the reduction stmt, and should *NOT*
|
|
be used to create the vectorized stmt. The right vectype for the vectorized
|
|
stmt is obtained from the type of the result X:
|
|
- get_vectype_for_scalar_type (TREE_TYPE (X))
|
|
+ get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
|
|
|
|
This means that, contrary to "regular" reductions (or "regular" stmts in
|
|
general), the following equation:
|
|
- STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
|
|
+ STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
|
|
does *NOT* necessarily hold for reduction patterns. */
|
|
|
|
bool
|
|
-vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
- stmt_vec_info *vec_stmt, slp_tree slp_node,
|
|
+vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
|
|
slp_instance slp_node_instance,
|
|
stmt_vector_for_cost *cost_vec)
|
|
{
|
|
- tree vec_dest;
|
|
tree scalar_dest;
|
|
- tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
|
|
tree vectype_in = NULL_TREE;
|
|
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
|
- enum tree_code code, orig_code;
|
|
- internal_fn reduc_fn;
|
|
- machine_mode vec_mode;
|
|
- int op_type;
|
|
- optab optab;
|
|
- tree new_temp = NULL_TREE;
|
|
- enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
|
|
+ enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
|
|
stmt_vec_info cond_stmt_vinfo = NULL;
|
|
- enum tree_code cond_reduc_op_code = ERROR_MARK;
|
|
tree scalar_type;
|
|
- bool is_simple_use;
|
|
int i;
|
|
int ncopies;
|
|
- int epilog_copies;
|
|
- stmt_vec_info prev_stmt_info, prev_phi_info;
|
|
bool single_defuse_cycle = false;
|
|
- stmt_vec_info new_stmt_info = NULL;
|
|
- int j;
|
|
- tree ops[3];
|
|
- enum vect_def_type dts[3];
|
|
- bool nested_cycle = false, found_nested_cycle_def = false;
|
|
+ bool nested_cycle = false;
|
|
bool double_reduc = false;
|
|
- basic_block def_bb;
|
|
- struct loop * def_stmt_loop;
|
|
- tree def_arg;
|
|
- auto_vec<tree> vec_oprnds0;
|
|
- auto_vec<tree> vec_oprnds1;
|
|
- auto_vec<tree> vec_oprnds2;
|
|
- auto_vec<tree> vect_defs;
|
|
- auto_vec<stmt_vec_info> phis;
|
|
int vec_num;
|
|
- tree def0, tem;
|
|
+ tree tem;
|
|
tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
|
|
tree cond_reduc_val = NULL_TREE;
|
|
|
|
/* Make sure it was already recognized as a reduction computation. */
|
|
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
|
|
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
|
|
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
|
|
return false;
|
|
|
|
- if (nested_in_vect_loop_p (loop, stmt_info))
|
|
+ /* The stmt we store reduction analysis meta on. */
|
|
+ stmt_vec_info reduc_info = info_for_reduction (stmt_info);
|
|
+ reduc_info->is_reduc_info = true;
|
|
+
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
|
|
{
|
|
- loop = loop->inner;
|
|
- nested_cycle = true;
|
|
+ if (is_a <gphi *> (stmt_info->stmt))
|
|
+ {
|
|
+ /* Analysis for double-reduction is done on the outer
|
|
+ loop PHI, nested cycles have no further restrictions. */
|
|
+ STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
|
|
+ /* For nested cycles we want to let regular vectorizable_*
|
|
+ routines handle code-generation. */
|
|
+ if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_double_reduction_def)
|
|
+ {
|
|
+ stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
|
|
+ STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
|
|
+ STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (stmt_info))
|
|
+ = vect_internal_def;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
|
|
+ return true;
|
|
}
|
|
|
|
- if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
|
- gcc_assert (slp_node
|
|
- && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
|
|
-
|
|
- if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt))
|
|
+ stmt_vec_info orig_stmt_of_analysis = stmt_info;
|
|
+ stmt_vec_info phi_info = stmt_info;
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
|
|
{
|
|
- tree phi_result = gimple_phi_result (phi);
|
|
- /* Analysis is fully done on the reduction stmt invocation. */
|
|
- if (! vec_stmt)
|
|
+ if (!is_a <gphi *> (stmt_info->stmt))
|
|
{
|
|
- if (slp_node)
|
|
- slp_node_instance->reduc_phis = slp_node;
|
|
-
|
|
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
|
|
return true;
|
|
}
|
|
-
|
|
- if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
|
|
- /* Leave the scalar phi in place. Note that checking
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (as below) only works
|
|
- for reductions involving a single statement. */
|
|
- return true;
|
|
-
|
|
- stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
|
|
- reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
|
|
-
|
|
- if (STMT_VINFO_VEC_REDUCTION_TYPE (reduc_stmt_info)
|
|
- == EXTRACT_LAST_REDUCTION)
|
|
- /* Leave the scalar phi in place. */
|
|
- return true;
|
|
-
|
|
- gassign *reduc_stmt = as_a <gassign *> (reduc_stmt_info->stmt);
|
|
- code = gimple_assign_rhs_code (reduc_stmt);
|
|
- for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
|
|
+ if (slp_node)
|
|
{
|
|
- tree op = gimple_op (reduc_stmt, k);
|
|
- if (op == phi_result)
|
|
- continue;
|
|
- if (k == 1 && code == COND_EXPR)
|
|
- continue;
|
|
- bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt);
|
|
- gcc_assert (is_simple_use);
|
|
- if (dt == vect_constant_def || dt == vect_external_def)
|
|
- continue;
|
|
- if (!vectype_in
|
|
- || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
|
|
- < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
|
|
- vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
|
|
- break;
|
|
+ slp_node_instance->reduc_phis = slp_node;
|
|
+ /* ??? We're leaving slp_node to point to the PHIs, we only
|
|
+ need it to get at the number of vector stmts which wasn't
|
|
+ yet initialized for the instance root. */
|
|
}
|
|
- /* For a nested cycle we might end up with an operation like
|
|
- phi_result * phi_result. */
|
|
- if (!vectype_in)
|
|
- vectype_in = STMT_VINFO_VECTYPE (stmt_info);
|
|
- gcc_assert (vectype_in);
|
|
-
|
|
- if (slp_node)
|
|
- ncopies = 1;
|
|
- else
|
|
- ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
|
|
-
|
|
- stmt_vec_info use_stmt_info;
|
|
- if (ncopies > 1
|
|
- && STMT_VINFO_RELEVANT (reduc_stmt_info) <= vect_used_only_live
|
|
- && (use_stmt_info = loop_vinfo->lookup_single_use (phi_result))
|
|
- && vect_stmt_to_vectorize (use_stmt_info) == reduc_stmt_info)
|
|
- single_defuse_cycle = true;
|
|
-
|
|
- /* Create the destination vector */
|
|
- scalar_dest = gimple_assign_lhs (reduc_stmt);
|
|
- vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
|
-
|
|
- if (slp_node)
|
|
- /* The size vect_schedule_slp_instance computes is off for us. */
|
|
- vec_num = vect_get_num_vectors
|
|
- (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
|
- * SLP_TREE_SCALAR_STMTS (slp_node).length (),
|
|
- vectype_in);
|
|
- else
|
|
- vec_num = 1;
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
|
|
+ stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (stmt_info));
|
|
+ else /* STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def */
|
|
+ {
|
|
+ use_operand_p use_p;
|
|
+ gimple *use_stmt;
|
|
+ bool res = single_imm_use (gimple_phi_result (stmt_info->stmt),
|
|
+ &use_p, &use_stmt);
|
|
+ gcc_assert (res);
|
|
+ phi_info = loop_vinfo->lookup_stmt (use_stmt);
|
|
+ stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
|
|
+ }
|
|
+ }
|
|
|
|
- /* Generate the reduction PHIs upfront. */
|
|
- prev_phi_info = NULL;
|
|
- for (j = 0; j < ncopies; j++)
|
|
+ /* PHIs should not participate in patterns. */
|
|
+ gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
|
|
+ gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
|
|
+
|
|
+ /* Verify following REDUC_IDX from the latch def leads us back to the PHI
|
|
+ and compute the reduction chain length. */
|
|
+ tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
|
|
+ loop_latch_edge (loop));
|
|
+ unsigned reduc_chain_length = 0;
|
|
+ bool only_slp_reduc_chain = true;
|
|
+ stmt_info = NULL;
|
|
+ while (reduc_def != PHI_RESULT (reduc_def_phi))
|
|
+ {
|
|
+ stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
|
|
+ stmt_vec_info vdef = vect_stmt_to_vectorize (def);
|
|
+ if (STMT_VINFO_REDUC_IDX (vdef) == -1)
|
|
{
|
|
- if (j == 0 || !single_defuse_cycle)
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "reduction chain broken by patterns.\n");
|
|
+ return false;
|
|
+ }
|
|
+ if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
|
|
+ only_slp_reduc_chain = false;
|
|
+ /* ??? For epilogue generation live members of the chain need
|
|
+ to point back to the PHI via their original stmt for
|
|
+ info_for_reduction to work. */
|
|
+ if (STMT_VINFO_LIVE_P (vdef))
|
|
+ STMT_VINFO_REDUC_DEF (def) = phi_info;
|
|
+ if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
|
|
+ {
|
|
+ if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
|
|
+ TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
|
|
{
|
|
- for (i = 0; i < vec_num; i++)
|
|
- {
|
|
- /* Create the reduction-phi that defines the reduction
|
|
- operand. */
|
|
- gimple *new_phi = create_phi_node (vec_dest, loop->header);
|
|
- stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
|
|
-
|
|
- if (slp_node)
|
|
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
|
|
- else
|
|
- {
|
|
- if (j == 0)
|
|
- STMT_VINFO_VEC_STMT (stmt_info)
|
|
- = *vec_stmt = new_phi_info;
|
|
- else
|
|
- STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
|
|
- prev_phi_info = new_phi_info;
|
|
- }
|
|
- }
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "conversion in the reduction chain.\n");
|
|
+ return false;
|
|
}
|
|
}
|
|
+ else if (!stmt_info)
|
|
+ /* First non-conversion stmt. */
|
|
+ stmt_info = vdef;
|
|
+ reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
|
|
+ reduc_chain_length++;
|
|
+ }
|
|
+ /* PHIs should not participate in patterns. */
|
|
+ gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
|
|
|
|
- return true;
|
|
+ if (nested_in_vect_loop_p (loop, stmt_info))
|
|
+ {
|
|
+ loop = loop->inner;
|
|
+ nested_cycle = true;
|
|
+ }
|
|
+
|
|
+ /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
|
|
+ element. */
|
|
+ if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
|
+ {
|
|
+ gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
|
|
+ stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
|
|
}
|
|
+ if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
|
+ gcc_assert (slp_node
|
|
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
|
|
|
|
/* 1. Is vectorizable reduction? */
|
|
/* Not supportable if the reduction variable is used in the loop, unless
|
|
@@ -6235,37 +5984,13 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
inside the loop body. The last operand is the reduction variable,
|
|
which is defined by the loop-header-phi. */
|
|
|
|
+ tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
|
|
gassign *stmt = as_a <gassign *> (stmt_info->stmt);
|
|
-
|
|
- /* Flatten RHS. */
|
|
- switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
|
|
- {
|
|
- case GIMPLE_BINARY_RHS:
|
|
- code = gimple_assign_rhs_code (stmt);
|
|
- op_type = TREE_CODE_LENGTH (code);
|
|
- gcc_assert (op_type == binary_op);
|
|
- ops[0] = gimple_assign_rhs1 (stmt);
|
|
- ops[1] = gimple_assign_rhs2 (stmt);
|
|
- break;
|
|
-
|
|
- case GIMPLE_TERNARY_RHS:
|
|
- code = gimple_assign_rhs_code (stmt);
|
|
- op_type = TREE_CODE_LENGTH (code);
|
|
- gcc_assert (op_type == ternary_op);
|
|
- ops[0] = gimple_assign_rhs1 (stmt);
|
|
- ops[1] = gimple_assign_rhs2 (stmt);
|
|
- ops[2] = gimple_assign_rhs3 (stmt);
|
|
- break;
|
|
-
|
|
- case GIMPLE_UNARY_RHS:
|
|
- return false;
|
|
-
|
|
- default:
|
|
- gcc_unreachable ();
|
|
- }
|
|
-
|
|
- if (code == COND_EXPR && slp_node)
|
|
- return false;
|
|
+ enum tree_code code = gimple_assign_rhs_code (stmt);
|
|
+ bool lane_reduc_code_p
|
|
+ = (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR);
|
|
+ int op_type = TREE_CODE_LENGTH (code);
|
|
|
|
scalar_dest = gimple_assign_lhs (stmt);
|
|
scalar_type = TREE_TYPE (scalar_dest);
|
|
@@ -6277,67 +6002,65 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
if (!type_has_mode_precision_p (scalar_type))
|
|
return false;
|
|
|
|
+ /* For lane-reducing ops we're reducing the number of reduction PHIs
|
|
+ which means the only use of that may be in the lane-reducing operation. */
|
|
+ if (lane_reduc_code_p
|
|
+ && reduc_chain_length != 1
|
|
+ && !only_slp_reduc_chain)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "lane-reducing reduction with extra stmts.\n");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
/* All uses but the last are expected to be defined in the loop.
|
|
The last use is the reduction variable. In case of nested cycle this
|
|
assumption is not true: we use reduc_index to record the index of the
|
|
reduction variable. */
|
|
- stmt_vec_info reduc_def_info;
|
|
- if (orig_stmt_info)
|
|
- reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info);
|
|
- else
|
|
- reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info);
|
|
- gcc_assert (reduc_def_info);
|
|
- gphi *reduc_def_phi = as_a <gphi *> (reduc_def_info->stmt);
|
|
- tree reduc_def = PHI_RESULT (reduc_def_phi);
|
|
- int reduc_index = -1;
|
|
+ reduc_def = PHI_RESULT (reduc_def_phi);
|
|
for (i = 0; i < op_type; i++)
|
|
{
|
|
+ tree op = gimple_op (stmt, i + 1);
|
|
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
|
|
if (i == 0 && code == COND_EXPR)
|
|
continue;
|
|
|
|
stmt_vec_info def_stmt_info;
|
|
- is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dts[i], &tem,
|
|
- &def_stmt_info);
|
|
- dt = dts[i];
|
|
- gcc_assert (is_simple_use);
|
|
- if (dt == vect_reduction_def
|
|
- && ops[i] == reduc_def)
|
|
- {
|
|
- reduc_index = i;
|
|
- continue;
|
|
- }
|
|
- else if (tem)
|
|
+ enum vect_def_type dt;
|
|
+ if (!vect_is_simple_use (op, loop_vinfo, &dt, &tem,
|
|
+ &def_stmt_info))
|
|
{
|
|
- /* To properly compute ncopies we are interested in the widest
|
|
- input type in case we're looking at a widening accumulation. */
|
|
- if (!vectype_in
|
|
- || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
|
|
- < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
|
|
- vectype_in = tem;
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "use not simple.\n");
|
|
+ return false;
|
|
}
|
|
+ if (i == STMT_VINFO_REDUC_IDX (stmt_info))
|
|
+ continue;
|
|
|
|
- if (dt != vect_internal_def
|
|
- && dt != vect_external_def
|
|
- && dt != vect_constant_def
|
|
- && dt != vect_induction_def
|
|
- && !(dt == vect_nested_cycle && nested_cycle))
|
|
+ /* There should be only one cycle def in the stmt, the one
|
|
+ leading to reduc_def. */
|
|
+ if (VECTORIZABLE_CYCLE_DEF (dt))
|
|
return false;
|
|
|
|
- if (dt == vect_nested_cycle
|
|
- && ops[i] == reduc_def)
|
|
- {
|
|
- found_nested_cycle_def = true;
|
|
- reduc_index = i;
|
|
- }
|
|
+ /* To properly compute ncopies we are interested in the widest
|
|
+ non-reduction input type in case we're looking at a widening
|
|
+ accumulation that we later handle in vect_transform_reduction. */
|
|
+ if (lane_reduc_code_p
|
|
+ && tem
|
|
+ && (!vectype_in
|
|
+ || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
|
|
+ < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem))))))
|
|
+ vectype_in = tem;
|
|
|
|
- if (i == 1 && code == COND_EXPR)
|
|
+ if (code == COND_EXPR)
|
|
{
|
|
- /* Record how value of COND_EXPR is defined. */
|
|
+ /* Record how the non-reduction-def value of COND_EXPR is defined. */
|
|
if (dt == vect_constant_def)
|
|
{
|
|
cond_reduc_dt = dt;
|
|
- cond_reduc_val = ops[i];
|
|
+ cond_reduc_val = op;
|
|
}
|
|
if (dt == vect_induction_def
|
|
&& def_stmt_info
|
|
@@ -6348,93 +6071,35 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
}
|
|
-
|
|
if (!vectype_in)
|
|
- vectype_in = vectype_out;
|
|
-
|
|
- /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
|
|
- directy used in stmt. */
|
|
- if (reduc_index == -1)
|
|
- {
|
|
- if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "in-order reduction chain without SLP.\n");
|
|
- return false;
|
|
- }
|
|
- }
|
|
-
|
|
- if (!(reduc_index == -1
|
|
- || dts[reduc_index] == vect_reduction_def
|
|
- || dts[reduc_index] == vect_nested_cycle
|
|
- || ((dts[reduc_index] == vect_internal_def
|
|
- || dts[reduc_index] == vect_external_def
|
|
- || dts[reduc_index] == vect_constant_def
|
|
- || dts[reduc_index] == vect_induction_def)
|
|
- && nested_cycle && found_nested_cycle_def)))
|
|
- {
|
|
- /* For pattern recognized stmts, orig_stmt might be a reduction,
|
|
- but some helper statements for the pattern might not, or
|
|
- might be COND_EXPRs with reduction uses in the condition. */
|
|
- gcc_assert (orig_stmt_info);
|
|
- return false;
|
|
- }
|
|
-
|
|
- /* PHIs should not participate in patterns. */
|
|
- gcc_assert (!STMT_VINFO_RELATED_STMT (reduc_def_info));
|
|
- enum vect_reduction_type v_reduc_type
|
|
- = STMT_VINFO_REDUC_TYPE (reduc_def_info);
|
|
- stmt_vec_info tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
|
|
+ vectype_in = STMT_VINFO_VECTYPE (phi_info);
|
|
+ STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
|
|
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
|
|
+ enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
|
|
+ STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
|
|
/* If we have a condition reduction, see if we can simplify it further. */
|
|
if (v_reduc_type == COND_REDUCTION)
|
|
{
|
|
- /* TODO: We can't yet handle reduction chains, since we need to treat
|
|
- each COND_EXPR in the chain specially, not just the last one.
|
|
- E.g. for:
|
|
-
|
|
- x_1 = PHI <x_3, ...>
|
|
- x_2 = a_2 ? ... : x_1;
|
|
- x_3 = a_3 ? ... : x_2;
|
|
+ if (slp_node)
|
|
+ return false;
|
|
|
|
- we're interested in the last element in x_3 for which a_2 || a_3
|
|
- is true, whereas the current reduction chain handling would
|
|
- vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
|
|
- as a reduction operation. */
|
|
- if (reduc_index == -1)
|
|
+ /* When the condition uses the reduction value in the condition, fail. */
|
|
+ if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "conditional reduction chains not supported\n");
|
|
+ "condition depends on previous iteration\n");
|
|
return false;
|
|
}
|
|
|
|
- /* vect_is_simple_reduction ensured that operand 2 is the
|
|
- loop-carried operand. */
|
|
- gcc_assert (reduc_index == 2);
|
|
-
|
|
- /* Loop peeling modifies initial value of reduction PHI, which
|
|
- makes the reduction stmt to be transformed different to the
|
|
- original stmt analyzed. We need to record reduction code for
|
|
- CONST_COND_REDUCTION type reduction at analyzing stage, thus
|
|
- it can be used directly at transform stage. */
|
|
- if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
|
|
- || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
|
|
- {
|
|
- /* Also set the reduction type to CONST_COND_REDUCTION. */
|
|
- gcc_assert (cond_reduc_dt == vect_constant_def);
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
|
|
- }
|
|
- else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
|
|
- vectype_in, OPTIMIZE_FOR_SPEED))
|
|
+ if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
|
|
+ vectype_in, OPTIMIZE_FOR_SPEED))
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"optimizing condition reduction with"
|
|
" FOLD_EXTRACT_LAST.\n");
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
|
|
+ STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
|
|
}
|
|
else if (cond_reduc_dt == vect_induction_def)
|
|
{
|
|
@@ -6445,6 +6110,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
gcc_assert (TREE_CODE (base) == INTEGER_CST
|
|
&& TREE_CODE (step) == INTEGER_CST);
|
|
cond_reduc_val = NULL_TREE;
|
|
+ enum tree_code cond_reduc_op_code = ERROR_MARK;
|
|
tree res = PHI_RESULT (STMT_VINFO_STMT (cond_stmt_vinfo));
|
|
if (!types_compatible_p (TREE_TYPE (res), TREE_TYPE (base)))
|
|
;
|
|
@@ -6477,16 +6143,17 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"condition expression based on "
|
|
"integer induction.\n");
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- = INTEGER_INDUC_COND_REDUCTION;
|
|
+ STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code;
|
|
+ STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)
|
|
+ = cond_reduc_val;
|
|
+ STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
|
|
}
|
|
}
|
|
else if (cond_reduc_dt == vect_constant_def)
|
|
{
|
|
enum vect_def_type cond_initial_dt;
|
|
- gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
|
|
tree cond_initial_val
|
|
- = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
|
|
+ = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_preheader_edge (loop));
|
|
|
|
gcc_assert (cond_reduc_val != NULL_TREE);
|
|
vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt);
|
|
@@ -6503,25 +6170,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
"condition expression based on "
|
|
"compile time constant.\n");
|
|
/* Record reduction code at analysis stage. */
|
|
- STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
|
|
+ STMT_VINFO_REDUC_CODE (reduc_info)
|
|
= integer_onep (e) ? MAX_EXPR : MIN_EXPR;
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
|
- = CONST_COND_REDUCTION;
|
|
+ STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
- if (orig_stmt_info)
|
|
- gcc_assert (tmp == orig_stmt_info
|
|
- || REDUC_GROUP_FIRST_ELEMENT (tmp) == orig_stmt_info);
|
|
- else
|
|
- /* We changed STMT to be the first stmt in reduction chain, hence we
|
|
- check that in this case the first element in the chain is STMT. */
|
|
- gcc_assert (tmp == stmt_info
|
|
- || REDUC_GROUP_FIRST_ELEMENT (tmp) == stmt_info);
|
|
-
|
|
- if (STMT_VINFO_LIVE_P (reduc_def_info))
|
|
+ if (STMT_VINFO_LIVE_P (phi_info))
|
|
return false;
|
|
|
|
if (slp_node)
|
|
@@ -6531,102 +6188,13 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
|
|
gcc_assert (ncopies >= 1);
|
|
|
|
- vec_mode = TYPE_MODE (vectype_in);
|
|
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
|
|
|
|
if (nested_cycle)
|
|
{
|
|
- def_bb = gimple_bb (reduc_def_phi);
|
|
- def_stmt_loop = def_bb->loop_father;
|
|
- def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
|
|
- loop_preheader_edge (def_stmt_loop));
|
|
- stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
|
|
- if (def_arg_stmt_info
|
|
- && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
|
|
- == vect_double_reduction_def))
|
|
- double_reduc = true;
|
|
- }
|
|
-
|
|
- vect_reduction_type reduction_type
|
|
- = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
|
|
- if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
|
|
- && ncopies > 1)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "multiple types in double reduction or condition "
|
|
- "reduction.\n");
|
|
- return false;
|
|
- }
|
|
-
|
|
- if (code == COND_EXPR)
|
|
- {
|
|
- /* Only call during the analysis stage, otherwise we'll lose
|
|
- STMT_VINFO_TYPE. */
|
|
- if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
|
|
- true, NULL, cost_vec))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "unsupported condition in reduction\n");
|
|
- return false;
|
|
- }
|
|
- }
|
|
- else if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
|
|
- || code == LROTATE_EXPR || code == RROTATE_EXPR)
|
|
- {
|
|
- /* Only call during the analysis stage, otherwise we'll lose
|
|
- STMT_VINFO_TYPE. We only support this for nested cycles
|
|
- without double reductions at the moment. */
|
|
- if (!nested_cycle
|
|
- || double_reduc
|
|
- || (!vec_stmt && !vectorizable_shift (stmt_info, gsi, NULL,
|
|
- NULL, cost_vec)))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "unsupported shift or rotation in reduction\n");
|
|
- return false;
|
|
- }
|
|
- }
|
|
- else
|
|
- {
|
|
- /* 4. Supportable by target? */
|
|
-
|
|
- /* 4.1. check support for the operation in the loop */
|
|
- optab = optab_for_tree_code (code, vectype_in, optab_default);
|
|
- if (!optab)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "no optab.\n");
|
|
-
|
|
- return false;
|
|
- }
|
|
-
|
|
- if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf (MSG_NOTE, "op not supported by target.\n");
|
|
-
|
|
- if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
|
|
- || !vect_worthwhile_without_simd_p (loop_vinfo, code))
|
|
- return false;
|
|
-
|
|
- if (dump_enabled_p ())
|
|
- dump_printf (MSG_NOTE, "proceeding using word mode.\n");
|
|
- }
|
|
-
|
|
- /* Worthwhile without SIMD support? */
|
|
- if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
|
|
- && !vect_worthwhile_without_simd_p (loop_vinfo, code))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "not worthwhile without SIMD support.\n");
|
|
-
|
|
- return false;
|
|
- }
|
|
+ gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info)
|
|
+ == vect_double_reduction_def);
|
|
+ double_reduc = true;
|
|
}
|
|
|
|
/* 4.2. Check support for the epilog operation.
|
|
@@ -6664,38 +6232,55 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
(and also the same tree-code) when generating the epilog code and
|
|
when generating the code inside the loop. */
|
|
|
|
- if (orig_stmt_info
|
|
- && (reduction_type == TREE_CODE_REDUCTION
|
|
- || reduction_type == FOLD_LEFT_REDUCTION))
|
|
- {
|
|
- /* This is a reduction pattern: get the vectype from the type of the
|
|
- reduction variable, and get the tree-code from orig_stmt. */
|
|
- orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt);
|
|
- gcc_assert (vectype_out);
|
|
- vec_mode = TYPE_MODE (vectype_out);
|
|
- }
|
|
- else
|
|
- {
|
|
- /* Regular reduction: use the same vectype and tree-code as used for
|
|
- the vector code inside the loop can be used for the epilog code. */
|
|
- orig_code = code;
|
|
-
|
|
- if (code == MINUS_EXPR)
|
|
- orig_code = PLUS_EXPR;
|
|
+ enum tree_code orig_code = STMT_VINFO_REDUC_CODE (phi_info);
|
|
+ STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
|
|
|
|
- /* For simple condition reductions, replace with the actual expression
|
|
- we want to base our reduction around. */
|
|
- if (reduction_type == CONST_COND_REDUCTION)
|
|
+ vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
|
|
+ if (reduction_type == TREE_CODE_REDUCTION)
|
|
+ {
|
|
+ /* Check whether it's ok to change the order of the computation.
|
|
+ Generally, when vectorizing a reduction we change the order of the
|
|
+ computation. This may change the behavior of the program in some
|
|
+ cases, so we need to check that this is ok. One exception is when
|
|
+ vectorizing an outer-loop: the inner-loop is executed sequentially,
|
|
+ and therefore vectorizing reductions in the inner-loop during
|
|
+ outer-loop vectorization is safe. */
|
|
+ if (needs_fold_left_reduction_p (scalar_type, orig_code))
|
|
+ {
|
|
+ /* When vectorizing a reduction chain w/o SLP the reduction PHI
|
|
+ is not directy used in stmt. */
|
|
+ if (!only_slp_reduc_chain
|
|
+ && reduc_chain_length != 1)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "in-order reduction chain without SLP.\n");
|
|
+ return false;
|
|
+ }
|
|
+ STMT_VINFO_REDUC_TYPE (reduc_info)
|
|
+ = reduction_type = FOLD_LEFT_REDUCTION;
|
|
+ }
|
|
+ else if (!commutative_tree_code (orig_code)
|
|
+ || !associative_tree_code (orig_code))
|
|
{
|
|
- orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
|
|
- gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "reduction: not commutative/associative");
|
|
+ return false;
|
|
}
|
|
- else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
|
|
- orig_code = cond_reduc_op_code;
|
|
}
|
|
|
|
- reduc_fn = IFN_LAST;
|
|
+ if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
|
|
+ && ncopies > 1)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "multiple types in double reduction or condition "
|
|
+ "reduction or fold-left reduction.\n");
|
|
+ return false;
|
|
+ }
|
|
|
|
+ internal_fn reduc_fn = IFN_LAST;
|
|
if (reduction_type == TREE_CODE_REDUCTION
|
|
|| reduction_type == FOLD_LEFT_REDUCTION
|
|
|| reduction_type == INTEGER_INDUC_COND_REDUCTION
|
|
@@ -6740,6 +6325,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
OPTIMIZE_FOR_SPEED))
|
|
reduc_fn = IFN_REDUC_MAX;
|
|
}
|
|
+ STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn;
|
|
|
|
if (reduction_type != EXTRACT_LAST_REDUCTION
|
|
&& (!nested_cycle || double_reduc)
|
|
@@ -6757,7 +6343,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
tree neutral_op = NULL_TREE;
|
|
if (slp_node)
|
|
neutral_op = neutral_op_for_slp_reduction
|
|
- (slp_node_instance->reduc_phis, code,
|
|
+ (slp_node_instance->reduc_phis, vectype_out, orig_code,
|
|
REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
|
|
|
|
if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
|
|
@@ -6822,10 +6408,11 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
which each SLP statement has its own initial value and in which
|
|
that value needs to be repeated for every instance of the
|
|
statement within the initial vector. */
|
|
- unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
|
|
+ unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
|
scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
|
|
if (!neutral_op
|
|
- && !can_duplicate_and_interleave_p (group_size, elt_mode))
|
|
+ && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
|
|
+ elt_mode))
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
@@ -6848,26 +6435,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
|
|
- /* In case of widenning multiplication by a constant, we update the type
|
|
- of the constant to be the type of the other operand. We check that the
|
|
- constant fits the type in the pattern recognition pass. */
|
|
- if (code == DOT_PROD_EXPR
|
|
- && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
|
|
- {
|
|
- if (TREE_CODE (ops[0]) == INTEGER_CST)
|
|
- ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
|
|
- else if (TREE_CODE (ops[1]) == INTEGER_CST)
|
|
- ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
|
|
- else
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "invalid types in dot-prod\n");
|
|
-
|
|
- return false;
|
|
- }
|
|
- }
|
|
-
|
|
if (reduction_type == COND_REDUCTION)
|
|
{
|
|
widest_int ni;
|
|
@@ -6925,26 +6492,68 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
This only works when we see both the reduction PHI and its only consumer
|
|
in vectorizable_reduction and there are no intermediate stmts
|
|
participating. */
|
|
- stmt_vec_info use_stmt_info;
|
|
- tree reduc_phi_result = gimple_phi_result (reduc_def_phi);
|
|
if (ncopies > 1
|
|
&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
|
|
- && (use_stmt_info = loop_vinfo->lookup_single_use (reduc_phi_result))
|
|
- && vect_stmt_to_vectorize (use_stmt_info) == stmt_info)
|
|
+ && reduc_chain_length == 1)
|
|
+ single_defuse_cycle = true;
|
|
+
|
|
+ if (single_defuse_cycle || lane_reduc_code_p)
|
|
{
|
|
- single_defuse_cycle = true;
|
|
- epilog_copies = 1;
|
|
+ gcc_assert (code != COND_EXPR);
|
|
+
|
|
+ /* 4. Supportable by target? */
|
|
+ bool ok = true;
|
|
+
|
|
+ /* 4.1. check support for the operation in the loop */
|
|
+ optab optab = optab_for_tree_code (code, vectype_in, optab_vector);
|
|
+ if (!optab)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "no optab.\n");
|
|
+ ok = false;
|
|
+ }
|
|
+
|
|
+ machine_mode vec_mode = TYPE_MODE (vectype_in);
|
|
+ if (ok && optab_handler (optab, vec_mode) == CODE_FOR_nothing)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf (MSG_NOTE, "op not supported by target.\n");
|
|
+ if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
|
|
+ || !vect_worthwhile_without_simd_p (loop_vinfo, code))
|
|
+ ok = false;
|
|
+ else
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf (MSG_NOTE, "proceeding using word mode.\n");
|
|
+ }
|
|
+
|
|
+ /* Worthwhile without SIMD support? */
|
|
+ if (ok
|
|
+ && !VECTOR_MODE_P (TYPE_MODE (vectype_in))
|
|
+ && !vect_worthwhile_without_simd_p (loop_vinfo, code))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "not worthwhile without SIMD support.\n");
|
|
+ ok = false;
|
|
+ }
|
|
+
|
|
+ /* lane-reducing operations have to go through vect_transform_reduction.
|
|
+ For the other cases try without the single cycle optimization. */
|
|
+ if (!ok)
|
|
+ {
|
|
+ if (lane_reduc_code_p)
|
|
+ return false;
|
|
+ else
|
|
+ single_defuse_cycle = false;
|
|
+ }
|
|
}
|
|
- else
|
|
- epilog_copies = ncopies;
|
|
+ STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
|
|
|
|
/* If the reduction stmt is one of the patterns that have lane
|
|
reduction embedded we cannot handle the case of ! single_defuse_cycle. */
|
|
- if ((ncopies > 1
|
|
- && ! single_defuse_cycle)
|
|
- && (code == DOT_PROD_EXPR
|
|
- || code == WIDEN_SUM_EXPR
|
|
- || code == SAD_EXPR))
|
|
+ if ((ncopies > 1 && ! single_defuse_cycle)
|
|
+ && lane_reduc_code_p)
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
@@ -6958,46 +6567,130 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
else
|
|
vec_num = 1;
|
|
|
|
+ vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
|
|
+ cost_vec);
|
|
+ if (dump_enabled_p ()
|
|
+ && reduction_type == FOLD_LEFT_REDUCTION)
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "using an in-order (fold-left) reduction.\n");
|
|
+ STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
|
|
+ /* All but single defuse-cycle optimized, lane-reducing and fold-left
|
|
+ reductions go through their own vectorizable_* routines. */
|
|
+ if (!single_defuse_cycle
|
|
+ && code != DOT_PROD_EXPR
|
|
+ && code != WIDEN_SUM_EXPR
|
|
+ && code != SAD_EXPR
|
|
+ && reduction_type != FOLD_LEFT_REDUCTION)
|
|
+ {
|
|
+ stmt_vec_info tem
|
|
+ = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
|
|
+ if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
|
|
+ {
|
|
+ gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
|
|
+ tem = REDUC_GROUP_FIRST_ELEMENT (tem);
|
|
+ }
|
|
+ STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
|
|
+ STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
|
|
+ }
|
|
+ else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
|
+ {
|
|
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
|
+ internal_fn cond_fn = get_conditional_internal_fn (code);
|
|
+
|
|
+ if (reduction_type != FOLD_LEFT_REDUCTION
|
|
+ && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in)
|
|
+ && (cond_fn == IFN_LAST
|
|
+ || !direct_internal_fn_supported_p (cond_fn, vectype_in,
|
|
+ OPTIMIZE_FOR_SPEED)))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "can't use a fully-masked loop because no"
|
|
+ " conditional operation is available.\n");
|
|
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
|
|
+ }
|
|
+ else
|
|
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
|
|
+ vectype_in, NULL);
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* Transform the definition stmt STMT_INFO of a reduction PHI backedge
|
|
+ value. */
|
|
+
|
|
+bool
|
|
+vect_transform_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
+ stmt_vec_info *vec_stmt, slp_tree slp_node)
|
|
+{
|
|
+ tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
|
+ int i;
|
|
+ int ncopies;
|
|
+ int j;
|
|
+ int vec_num;
|
|
+
|
|
+ stmt_vec_info reduc_info = info_for_reduction (stmt_info);
|
|
+ gcc_assert (reduc_info->is_reduc_info);
|
|
+
|
|
+ if (nested_in_vect_loop_p (loop, stmt_info))
|
|
+ {
|
|
+ loop = loop->inner;
|
|
+ gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def);
|
|
+ }
|
|
+
|
|
+ gassign *stmt = as_a <gassign *> (stmt_info->stmt);
|
|
+ enum tree_code code = gimple_assign_rhs_code (stmt);
|
|
+ int op_type = TREE_CODE_LENGTH (code);
|
|
+
|
|
+ /* Flatten RHS. */
|
|
+ tree ops[3];
|
|
+ switch (get_gimple_rhs_class (code))
|
|
+ {
|
|
+ case GIMPLE_TERNARY_RHS:
|
|
+ ops[2] = gimple_assign_rhs3 (stmt);
|
|
+ /* Fall thru. */
|
|
+ case GIMPLE_BINARY_RHS:
|
|
+ ops[0] = gimple_assign_rhs1 (stmt);
|
|
+ ops[1] = gimple_assign_rhs2 (stmt);
|
|
+ break;
|
|
+ default:
|
|
+ gcc_unreachable ();
|
|
+ }
|
|
+
|
|
+ /* All uses but the last are expected to be defined in the loop.
|
|
+ The last use is the reduction variable. In case of nested cycle this
|
|
+ assumption is not true: we use reduc_index to record the index of the
|
|
+ reduction variable. */
|
|
+ stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
|
|
+ gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
|
|
+ int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
|
|
+ tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
|
|
+
|
|
+ if (slp_node)
|
|
+ {
|
|
+ ncopies = 1;
|
|
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
|
|
+ vec_num = 1;
|
|
+ }
|
|
+
|
|
internal_fn cond_fn = get_conditional_internal_fn (code);
|
|
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
|
-
|
|
- if (!vec_stmt) /* transformation not required. */
|
|
- {
|
|
- vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
|
|
- if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
|
- {
|
|
- if (reduction_type != FOLD_LEFT_REDUCTION
|
|
- && (cond_fn == IFN_LAST
|
|
- || !direct_internal_fn_supported_p (cond_fn, vectype_in,
|
|
- OPTIMIZE_FOR_SPEED)))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "can't use a fully-masked loop because no"
|
|
- " conditional operation is available.\n");
|
|
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
|
|
- }
|
|
- else if (reduc_index == -1)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "can't use a fully-masked loop for chained"
|
|
- " reductions.\n");
|
|
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
|
|
- }
|
|
- else
|
|
- vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
|
|
- vectype_in);
|
|
- }
|
|
- if (dump_enabled_p ()
|
|
- && reduction_type == FOLD_LEFT_REDUCTION)
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "using an in-order (fold-left) reduction.\n");
|
|
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
|
|
- return true;
|
|
- }
|
|
+ bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
|
|
|
|
/* Transform. */
|
|
+ stmt_vec_info new_stmt_info = NULL;
|
|
+ stmt_vec_info prev_stmt_info;
|
|
+ tree new_temp = NULL_TREE;
|
|
+ auto_vec<tree> vec_oprnds0;
|
|
+ auto_vec<tree> vec_oprnds1;
|
|
+ auto_vec<tree> vec_oprnds2;
|
|
+ tree def0;
|
|
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
|
|
@@ -7008,23 +6701,26 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
|
|
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
|
|
|
|
+ vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
|
|
if (reduction_type == FOLD_LEFT_REDUCTION)
|
|
- return vectorize_fold_left_reduction
|
|
- (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
|
|
- reduc_fn, ops, vectype_in, reduc_index, masks);
|
|
-
|
|
- if (reduction_type == EXTRACT_LAST_REDUCTION)
|
|
{
|
|
- gcc_assert (!slp_node);
|
|
- return vectorizable_condition (stmt_info, gsi, vec_stmt,
|
|
- true, NULL, NULL);
|
|
+ internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
|
|
+ return vectorize_fold_left_reduction
|
|
+ (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
|
|
+ reduc_fn, ops, vectype_in, reduc_index, masks);
|
|
}
|
|
|
|
+ bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
|
|
+ gcc_assert (single_defuse_cycle
|
|
+ || code == DOT_PROD_EXPR
|
|
+ || code == WIDEN_SUM_EXPR
|
|
+ || code == SAD_EXPR);
|
|
+
|
|
/* Create the destination vector */
|
|
- vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
|
+ tree scalar_dest = gimple_assign_lhs (stmt);
|
|
+ tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
|
|
|
prev_stmt_info = NULL;
|
|
- prev_phi_info = NULL;
|
|
if (!slp_node)
|
|
{
|
|
vec_oprnds0.create (1);
|
|
@@ -7033,32 +6729,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
vec_oprnds2.create (1);
|
|
}
|
|
|
|
- phis.create (vec_num);
|
|
- vect_defs.create (vec_num);
|
|
- if (!slp_node)
|
|
- vect_defs.quick_push (NULL_TREE);
|
|
-
|
|
- if (slp_node)
|
|
- phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis));
|
|
- else
|
|
- phis.quick_push (STMT_VINFO_VEC_STMT (reduc_def_info));
|
|
-
|
|
for (j = 0; j < ncopies; j++)
|
|
{
|
|
- if (code == COND_EXPR)
|
|
- {
|
|
- gcc_assert (!slp_node);
|
|
- vectorizable_condition (stmt_info, gsi, vec_stmt,
|
|
- true, NULL, NULL);
|
|
- break;
|
|
- }
|
|
- if (code == LSHIFT_EXPR
|
|
- || code == RSHIFT_EXPR)
|
|
- {
|
|
- vectorizable_shift (stmt_info, gsi, vec_stmt, slp_node, NULL);
|
|
- break;
|
|
- }
|
|
-
|
|
/* Handle uses. */
|
|
if (j == 0)
|
|
{
|
|
@@ -7066,16 +6738,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
{
|
|
/* Get vec defs for all the operands except the reduction index,
|
|
ensuring the ordering of the ops in the vector is kept. */
|
|
- auto_vec<tree, 3> slp_ops;
|
|
auto_vec<vec<tree>, 3> vec_defs;
|
|
-
|
|
- slp_ops.quick_push (ops[0]);
|
|
- slp_ops.quick_push (ops[1]);
|
|
- if (op_type == ternary_op)
|
|
- slp_ops.quick_push (ops[2]);
|
|
-
|
|
- vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
|
|
-
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds0.safe_splice (vec_defs[0]);
|
|
vec_defs[0].release ();
|
|
vec_oprnds1.safe_splice (vec_defs[1]);
|
|
@@ -7130,7 +6794,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
|
|
{
|
|
tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
|
|
- if (masked_loop_p)
|
|
+ if (masked_loop_p && !mask_by_cond_expr)
|
|
{
|
|
/* Make sure that the reduction accumulator is vop[0]. */
|
|
if (reduc_index == 1)
|
|
@@ -7154,6 +6818,14 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
if (op_type == ternary_op)
|
|
vop[2] = vec_oprnds2[i];
|
|
|
|
+ if (masked_loop_p && mask_by_cond_expr)
|
|
+ {
|
|
+ tree mask = vect_get_loop_mask (gsi, masks,
|
|
+ vec_num * ncopies,
|
|
+ vectype_in, i * ncopies + j);
|
|
+ build_vect_cond_expr (code, vop, mask, gsi);
|
|
+ }
|
|
+
|
|
gassign *new_stmt = gimple_build_assign (vec_dest, code,
|
|
vop[0], vop[1], vop[2]);
|
|
new_temp = make_ssa_name (vec_dest, new_stmt);
|
|
@@ -7163,15 +6835,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
|
|
if (slp_node)
|
|
- {
|
|
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
|
|
- vect_defs.quick_push (new_temp);
|
|
- }
|
|
- else
|
|
- vect_defs[0] = new_temp;
|
|
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
|
|
}
|
|
|
|
- if (slp_node)
|
|
+ if (slp_node || single_defuse_cycle)
|
|
continue;
|
|
|
|
if (j == 0)
|
|
@@ -7182,20 +6849,244 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
prev_stmt_info = new_stmt_info;
|
|
}
|
|
|
|
- /* Finalize the reduction-phi (set its arguments) and create the
|
|
- epilog reduction code. */
|
|
- if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
|
|
- vect_defs[0] = gimple_get_lhs ((*vec_stmt)->stmt);
|
|
+ if (single_defuse_cycle && !slp_node)
|
|
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* Transform phase of a cycle PHI. */
|
|
+
|
|
+bool
|
|
+vect_transform_cycle_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
|
|
+ slp_tree slp_node, slp_instance slp_node_instance)
|
|
+{
|
|
+ tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
|
+ int i;
|
|
+ int ncopies;
|
|
+ stmt_vec_info prev_phi_info;
|
|
+ int j;
|
|
+ bool nested_cycle = false;
|
|
+ int vec_num;
|
|
+
|
|
+ if (nested_in_vect_loop_p (loop, stmt_info))
|
|
+ {
|
|
+ loop = loop->inner;
|
|
+ nested_cycle = true;
|
|
+ }
|
|
+
|
|
+ stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
|
|
+ reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
|
|
+ stmt_vec_info reduc_info = info_for_reduction (stmt_info);
|
|
+ gcc_assert (reduc_info->is_reduc_info);
|
|
+
|
|
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
|
|
+ || STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)
|
|
+ /* Leave the scalar phi in place. */
|
|
+ return true;
|
|
+
|
|
+ tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
|
|
+ /* For a nested cycle we do not fill the above. */
|
|
+ if (!vectype_in)
|
|
+ vectype_in = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ gcc_assert (vectype_in);
|
|
+
|
|
+ if (slp_node)
|
|
+ {
|
|
+ /* The size vect_schedule_slp_instance computes is off for us. */
|
|
+ vec_num = vect_get_num_vectors
|
|
+ (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
|
+ * SLP_TREE_SCALAR_STMTS (slp_node).length (), vectype_in);
|
|
+ ncopies = 1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ vec_num = 1;
|
|
+ ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
|
|
+ }
|
|
+
|
|
+ /* Check whether we should use a single PHI node and accumulate
|
|
+ vectors to one before the backedge. */
|
|
+ if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
|
|
+ ncopies = 1;
|
|
+
|
|
+ /* Create the destination vector */
|
|
+ gphi *phi = as_a <gphi *> (stmt_info->stmt);
|
|
+ tree vec_dest = vect_create_destination_var (gimple_phi_result (phi),
|
|
+ vectype_out);
|
|
+
|
|
+ /* Get the loop-entry arguments. */
|
|
+ tree vec_initial_def;
|
|
+ auto_vec<tree> vec_initial_defs;
|
|
+ if (slp_node)
|
|
+ {
|
|
+ vec_initial_defs.reserve (vec_num);
|
|
+ gcc_assert (slp_node == slp_node_instance->reduc_phis);
|
|
+ stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
|
|
+ tree neutral_op
|
|
+ = neutral_op_for_slp_reduction (slp_node, vectype_out,
|
|
+ STMT_VINFO_REDUC_CODE (reduc_info),
|
|
+ first != NULL);
|
|
+ get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
|
|
+ &vec_initial_defs, vec_num,
|
|
+ first != NULL, neutral_op);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ /* Get at the scalar def before the loop, that defines the initial
|
|
+ value of the reduction variable. */
|
|
+ tree initial_def = PHI_ARG_DEF_FROM_EDGE (phi,
|
|
+ loop_preheader_edge (loop));
|
|
+ /* Optimize: if initial_def is for REDUC_MAX smaller than the base
|
|
+ and we can't use zero for induc_val, use initial_def. Similarly
|
|
+ for REDUC_MIN and initial_def larger than the base. */
|
|
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
|
|
+ {
|
|
+ tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
|
|
+ if (TREE_CODE (initial_def) == INTEGER_CST
|
|
+ && !integer_zerop (induc_val)
|
|
+ && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
|
|
+ && tree_int_cst_lt (initial_def, induc_val))
|
|
+ || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
|
|
+ && tree_int_cst_lt (induc_val, initial_def))))
|
|
+ {
|
|
+ induc_val = initial_def;
|
|
+ /* Communicate we used the initial_def to epilouge
|
|
+ generation. */
|
|
+ STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
|
|
+ }
|
|
+ vec_initial_def = build_vector_from_val (vectype_out, induc_val);
|
|
+ }
|
|
+ else if (nested_cycle)
|
|
+ {
|
|
+ /* Do not use an adjustment def as that case is not supported
|
|
+ correctly if ncopies is not one. */
|
|
+ vec_initial_def = vect_get_vec_def_for_operand (initial_def,
|
|
+ reduc_stmt_info);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ tree adjustment_def = NULL_TREE;
|
|
+ tree *adjustment_defp = &adjustment_def;
|
|
+ enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
|
|
+ adjustment_defp = NULL;
|
|
+ vec_initial_def
|
|
+ = get_initial_def_for_reduction (reduc_stmt_info, code,
|
|
+ initial_def, adjustment_defp);
|
|
+ STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def;
|
|
+ }
|
|
+ vec_initial_defs.create (1);
|
|
+ vec_initial_defs.quick_push (vec_initial_def);
|
|
+ }
|
|
+
|
|
+ /* Generate the reduction PHIs upfront. */
|
|
+ prev_phi_info = NULL;
|
|
+ for (i = 0; i < vec_num; i++)
|
|
+ {
|
|
+ tree vec_init_def = vec_initial_defs[i];
|
|
+ for (j = 0; j < ncopies; j++)
|
|
+ {
|
|
+ /* Create the reduction-phi that defines the reduction
|
|
+ operand. */
|
|
+ gphi *new_phi = create_phi_node (vec_dest, loop->header);
|
|
+ stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
|
|
+
|
|
+ /* Set the loop-entry arg of the reduction-phi. */
|
|
+ if (j != 0 && nested_cycle)
|
|
+ vec_init_def = vect_get_vec_def_for_stmt_copy (loop_vinfo,
|
|
+ vec_init_def);
|
|
+ add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
|
|
+ UNKNOWN_LOCATION);
|
|
+
|
|
+ /* The loop-latch arg is set in epilogue processing. */
|
|
+
|
|
+ if (slp_node)
|
|
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
|
|
+ else
|
|
+ {
|
|
+ if (j == 0)
|
|
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
|
|
+ else
|
|
+ STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
|
|
+ prev_phi_info = new_phi_info;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* Vectorizes LC PHIs. */
|
|
+
|
|
+bool
|
|
+vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
|
|
+ slp_tree slp_node)
|
|
+{
|
|
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
+ if (!loop_vinfo
|
|
+ || !is_a <gphi *> (stmt_info->stmt)
|
|
+ || gimple_phi_num_args (stmt_info->stmt) != 1)
|
|
+ return false;
|
|
+
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
|
|
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
|
|
+ return false;
|
|
+
|
|
+ if (!vec_stmt) /* transformation not required. */
|
|
+ {
|
|
+ STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
|
|
+ return true;
|
|
+ }
|
|
|
|
- vect_create_epilog_for_reduction (vect_defs, stmt_info, reduc_def_phi,
|
|
- epilog_copies, reduc_fn, phis,
|
|
- double_reduc, slp_node, slp_node_instance,
|
|
- cond_reduc_val, cond_reduc_op_code,
|
|
- neutral_op);
|
|
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ tree scalar_dest = gimple_phi_result (stmt_info->stmt);
|
|
+ basic_block bb = gimple_bb (stmt_info->stmt);
|
|
+ edge e = single_pred_edge (bb);
|
|
+ tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
|
+ vec<tree> vec_oprnds = vNULL;
|
|
+ vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE,
|
|
+ stmt_info, &vec_oprnds, NULL, slp_node);
|
|
+ if (slp_node)
|
|
+ {
|
|
+ unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
|
+ gcc_assert (vec_oprnds.length () == vec_num);
|
|
+ for (unsigned i = 0; i < vec_num; i++)
|
|
+ {
|
|
+ /* Create the vectorized LC PHI node. */
|
|
+ gphi *new_phi = create_phi_node (vec_dest, bb);
|
|
+ add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
|
|
+ stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
|
|
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
|
+ stmt_vec_info prev_phi_info = NULL;
|
|
+ for (unsigned i = 0; i < ncopies; i++)
|
|
+ {
|
|
+ if (i != 0)
|
|
+ vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL);
|
|
+ /* Create the vectorized LC PHI node. */
|
|
+ gphi *new_phi = create_phi_node (vec_dest, bb);
|
|
+ add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION);
|
|
+ stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
|
|
+ if (i == 0)
|
|
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
|
|
+ else
|
|
+ STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
|
|
+ prev_phi_info = new_phi_info;
|
|
+ }
|
|
+ }
|
|
+ vec_oprnds.release ();
|
|
|
|
return true;
|
|
}
|
|
|
|
+
|
|
/* Function vect_min_worthwhile_factor.
|
|
|
|
For a loop where we could vectorize the operation indicated by CODE,
|
|
@@ -7789,8 +7680,8 @@ vectorizable_induction (stmt_vec_info stmt_info,
|
|
bool
|
|
vectorizable_live_operation (stmt_vec_info stmt_info,
|
|
gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
|
|
- slp_tree slp_node, int slp_index,
|
|
- stmt_vec_info *vec_stmt,
|
|
+ slp_tree slp_node, slp_instance slp_node_instance,
|
|
+ int slp_index, stmt_vec_info *vec_stmt,
|
|
stmt_vector_for_cost *)
|
|
{
|
|
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
|
@@ -7807,8 +7698,33 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
|
|
|
|
gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
|
|
|
|
- if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
|
|
- return false;
|
|
+ /* If a stmt of a reduction is live, vectorize it via
|
|
+ vect_create_epilog_for_reduction. vectorizable_reduction assessed
|
|
+ validity so just trigger the transform here. */
|
|
+ if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
|
|
+ {
|
|
+ if (!vec_stmt)
|
|
+ return true;
|
|
+ if (slp_node)
|
|
+ {
|
|
+ /* For reduction chains the meta-info is attached to
|
|
+ the group leader. */
|
|
+ if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
|
+ stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
|
|
+ /* For SLP reductions we vectorize the epilogue for
|
|
+ all involved stmts together. */
|
|
+ else if (slp_index != 0)
|
|
+ return true;
|
|
+ }
|
|
+ stmt_vec_info reduc_info = info_for_reduction (stmt_info);
|
|
+ gcc_assert (reduc_info->is_reduc_info);
|
|
+ if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
|
|
+ || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
|
|
+ return true;
|
|
+ vect_create_epilog_for_reduction (stmt_info, slp_node,
|
|
+ slp_node_instance);
|
|
+ return true;
|
|
+ }
|
|
|
|
/* FORNOW. CHECKME. */
|
|
if (nested_in_vect_loop_p (loop, stmt_info))
|
|
@@ -7892,7 +7808,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
|
|
gcc_assert (ncopies == 1 && !slp_node);
|
|
vect_record_loop_mask (loop_vinfo,
|
|
&LOOP_VINFO_MASKS (loop_vinfo),
|
|
- 1, vectype);
|
|
+ 1, vectype, NULL);
|
|
}
|
|
}
|
|
return true;
|
|
@@ -8071,31 +7987,34 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
|
|
return false;
|
|
}
|
|
|
|
-/* Return a mask type with half the number of elements as TYPE. */
|
|
+/* Return a mask type with half the number of elements as OLD_TYPE,
|
|
+ given that it should have mode NEW_MODE. */
|
|
|
|
tree
|
|
-vect_halve_mask_nunits (tree type)
|
|
+vect_halve_mask_nunits (tree old_type, machine_mode new_mode)
|
|
{
|
|
- poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (type), 2);
|
|
- return build_truth_vector_type (nunits, current_vector_size);
|
|
+ poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (old_type), 2);
|
|
+ return build_truth_vector_type_for_mode (nunits, new_mode);
|
|
}
|
|
|
|
-/* Return a mask type with twice as many elements as TYPE. */
|
|
+/* Return a mask type with twice as many elements as OLD_TYPE,
|
|
+ given that it should have mode NEW_MODE. */
|
|
|
|
tree
|
|
-vect_double_mask_nunits (tree type)
|
|
+vect_double_mask_nunits (tree old_type, machine_mode new_mode)
|
|
{
|
|
- poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type) * 2;
|
|
- return build_truth_vector_type (nunits, current_vector_size);
|
|
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (old_type) * 2;
|
|
+ return build_truth_vector_type_for_mode (nunits, new_mode);
|
|
}
|
|
|
|
/* Record that a fully-masked version of LOOP_VINFO would need MASKS to
|
|
contain a sequence of NVECTORS masks that each control a vector of type
|
|
- VECTYPE. */
|
|
+ VECTYPE. If SCALAR_MASK is nonnull, the fully-masked loop would AND
|
|
+ these vector masks with the vector version of SCALAR_MASK. */
|
|
|
|
void
|
|
vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
|
|
- unsigned int nvectors, tree vectype)
|
|
+ unsigned int nvectors, tree vectype, tree scalar_mask)
|
|
{
|
|
gcc_assert (nvectors != 0);
|
|
if (masks->length () < nvectors)
|
|
@@ -8106,10 +8025,17 @@ vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
|
|
unsigned int nscalars_per_iter
|
|
= exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
|
|
LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
|
|
+
|
|
+ if (scalar_mask)
|
|
+ {
|
|
+ scalar_cond_masked_key cond (scalar_mask, nvectors);
|
|
+ loop_vinfo->scalar_cond_masked_set.add (cond);
|
|
+ }
|
|
+
|
|
if (rgm->max_nscalars_per_iter < nscalars_per_iter)
|
|
{
|
|
rgm->max_nscalars_per_iter = nscalars_per_iter;
|
|
- rgm->mask_type = build_same_sized_truth_vector_type (vectype);
|
|
+ rgm->mask_type = truth_type_for (vectype);
|
|
}
|
|
}
|
|
|
|
@@ -8154,7 +8080,7 @@ vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
|
|
gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
|
|
TYPE_VECTOR_SUBPARTS (vectype)));
|
|
gimple_seq seq = NULL;
|
|
- mask_type = build_same_sized_truth_vector_type (vectype);
|
|
+ mask_type = truth_type_for (vectype);
|
|
mask = gimple_build (&seq, VIEW_CONVERT_EXPR, mask_type, mask);
|
|
if (seq)
|
|
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
|
|
@@ -8242,6 +8168,186 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
|
|
*seen_store = stmt_info;
|
|
}
|
|
|
|
+/* Helper function to pass to simplify_replace_tree to enable replacing tree's
|
|
+ in the hash_map with its corresponding values. */
|
|
+
|
|
+static tree
|
|
+find_in_mapping (tree t, void *context)
|
|
+{
|
|
+ hash_map<tree,tree>* mapping = (hash_map<tree, tree>*) context;
|
|
+
|
|
+ tree *value = mapping->get (t);
|
|
+ return value ? *value : t;
|
|
+}
|
|
+
|
|
+/* Update EPILOGUE's loop_vec_info. EPILOGUE was constructed as a copy of the
|
|
+ original loop that has now been vectorized.
|
|
+
|
|
+ The inits of the data_references need to be advanced with the number of
|
|
+ iterations of the main loop. This has been computed in vect_do_peeling and
|
|
+ is stored in parameter ADVANCE. We first restore the data_references
|
|
+ initial offset with the values recored in ORIG_DRS_INIT.
|
|
+
|
|
+ Since the loop_vec_info of this EPILOGUE was constructed for the original
|
|
+ loop, its stmt_vec_infos all point to the original statements. These need
|
|
+ to be updated to point to their corresponding copies as well as the SSA_NAMES
|
|
+ in their PATTERN_DEF_SEQs and RELATED_STMTs.
|
|
+
|
|
+ The data_reference's connections also need to be updated. Their
|
|
+ corresponding dr_vec_info need to be reconnected to the EPILOGUE's
|
|
+ stmt_vec_infos, their statements need to point to their corresponding copy,
|
|
+ if they are gather loads or scatter stores then their reference needs to be
|
|
+ updated to point to its corresponding copy and finally we set
|
|
+ 'base_misaligned' to false as we have already peeled for alignment in the
|
|
+ prologue of the main loop. */
|
|
+
|
|
+static void
|
|
+update_epilogue_loop_vinfo (class loop *epilogue, tree advance,
|
|
+ drs_init_vec &orig_drs_init)
|
|
+{
|
|
+ loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue);
|
|
+ auto_vec<gimple *> stmt_worklist;
|
|
+ hash_map<tree,tree> mapping;
|
|
+ gimple *orig_stmt, *new_stmt;
|
|
+ gimple_stmt_iterator epilogue_gsi;
|
|
+ gphi_iterator epilogue_phi_gsi;
|
|
+ stmt_vec_info stmt_vinfo = NULL, related_vinfo;
|
|
+ basic_block *epilogue_bbs = get_loop_body (epilogue);
|
|
+
|
|
+ LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
|
|
+
|
|
+ /* Restore original data_reference's offset, before the previous loop and its
|
|
+ prologue. */
|
|
+ std::pair<data_reference*, tree> *dr_init;
|
|
+ unsigned i;
|
|
+ for (i = 0; orig_drs_init.iterate (i, &dr_init); i++)
|
|
+ DR_OFFSET (dr_init->first) = dr_init->second;
|
|
+
|
|
+ /* Advance data_reference's with the number of iterations of the previous
|
|
+ loop and its prologue. */
|
|
+ vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR);
|
|
+
|
|
+
|
|
+ /* The EPILOGUE loop is a copy of the original loop so they share the same
|
|
+ gimple UIDs. In this loop we update the loop_vec_info of the EPILOGUE to
|
|
+ point to the copied statements. We also create a mapping of all LHS' in
|
|
+ the original loop and all the LHS' in the EPILOGUE and create worklists to
|
|
+ update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs. */
|
|
+ for (unsigned i = 0; i < epilogue->num_nodes; ++i)
|
|
+ {
|
|
+ for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]);
|
|
+ !gsi_end_p (epilogue_phi_gsi); gsi_next (&epilogue_phi_gsi))
|
|
+ {
|
|
+ new_stmt = epilogue_phi_gsi.phi ();
|
|
+
|
|
+ gcc_assert (gimple_uid (new_stmt) > 0);
|
|
+ stmt_vinfo
|
|
+ = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
|
|
+
|
|
+ orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
|
|
+ STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
|
|
+
|
|
+ mapping.put (gimple_phi_result (orig_stmt),
|
|
+ gimple_phi_result (new_stmt));
|
|
+ /* PHI nodes can not have patterns or related statements. */
|
|
+ gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL
|
|
+ && STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL);
|
|
+ }
|
|
+
|
|
+ for (epilogue_gsi = gsi_start_bb (epilogue_bbs[i]);
|
|
+ !gsi_end_p (epilogue_gsi); gsi_next (&epilogue_gsi))
|
|
+ {
|
|
+ new_stmt = gsi_stmt (epilogue_gsi);
|
|
+
|
|
+ gcc_assert (gimple_uid (new_stmt) > 0);
|
|
+ stmt_vinfo
|
|
+ = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
|
|
+
|
|
+ orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
|
|
+ STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
|
|
+
|
|
+ if (tree old_lhs = gimple_get_lhs (orig_stmt))
|
|
+ mapping.put (old_lhs, gimple_get_lhs (new_stmt));
|
|
+
|
|
+ if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo))
|
|
+ {
|
|
+ gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
|
|
+ for (gimple_stmt_iterator gsi = gsi_start (seq);
|
|
+ !gsi_end_p (gsi); gsi_next (&gsi))
|
|
+ stmt_worklist.safe_push (gsi_stmt (gsi));
|
|
+ }
|
|
+
|
|
+ related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
|
|
+ if (related_vinfo != NULL && related_vinfo != stmt_vinfo)
|
|
+ {
|
|
+ gimple *stmt = STMT_VINFO_STMT (related_vinfo);
|
|
+ stmt_worklist.safe_push (stmt);
|
|
+ /* Set BB such that the assert in
|
|
+ 'get_initial_def_for_reduction' is able to determine that
|
|
+ the BB of the related stmt is inside this loop. */
|
|
+ gimple_set_bb (stmt,
|
|
+ gimple_bb (new_stmt));
|
|
+ related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo);
|
|
+ gcc_assert (related_vinfo == NULL
|
|
+ || related_vinfo == stmt_vinfo);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed
|
|
+ using the original main loop and thus need to be updated to refer to the
|
|
+ cloned variables used in the epilogue. */
|
|
+ for (unsigned i = 0; i < stmt_worklist.length (); ++i)
|
|
+ {
|
|
+ gimple *stmt = stmt_worklist[i];
|
|
+ tree *new_op;
|
|
+
|
|
+ for (unsigned j = 1; j < gimple_num_ops (stmt); ++j)
|
|
+ {
|
|
+ tree op = gimple_op (stmt, j);
|
|
+ if ((new_op = mapping.get(op)))
|
|
+ gimple_set_op (stmt, j, *new_op);
|
|
+ else
|
|
+ {
|
|
+ op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
|
|
+ &find_in_mapping, &mapping);
|
|
+ gimple_set_op (stmt, j, op);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ struct data_reference *dr;
|
|
+ vec<data_reference_p> datarefs = epilogue_vinfo->shared->datarefs;
|
|
+ FOR_EACH_VEC_ELT (datarefs, i, dr)
|
|
+ {
|
|
+ orig_stmt = DR_STMT (dr);
|
|
+ gcc_assert (gimple_uid (orig_stmt) > 0);
|
|
+ stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (orig_stmt) - 1];
|
|
+ /* Data references for gather loads and scatter stores do not use the
|
|
+ updated offset we set using ADVANCE. Instead we have to make sure the
|
|
+ reference in the data references point to the corresponding copy of
|
|
+ the original in the epilogue. */
|
|
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
|
|
+ {
|
|
+ DR_REF (dr)
|
|
+ = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
|
|
+ &find_in_mapping, &mapping);
|
|
+ DR_BASE_ADDRESS (dr)
|
|
+ = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
|
|
+ &find_in_mapping, &mapping);
|
|
+ }
|
|
+ DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
|
|
+ stmt_vinfo->dr_aux.stmt = stmt_vinfo;
|
|
+ /* The vector size of the epilogue is smaller than that of the main loop
|
|
+ so the alignment is either the same or lower. This means the dr will
|
|
+ thus by definition be aligned. */
|
|
+ STMT_VINFO_DR_INFO (stmt_vinfo)->base_misaligned = false;
|
|
+ }
|
|
+
|
|
+ epilogue_vinfo->shared->datarefs_copy.release ();
|
|
+ epilogue_vinfo->shared->save_datarefs ();
|
|
+}
|
|
+
|
|
/* Function vect_transform_loop.
|
|
|
|
The analysis phase has determined that the loop is vectorizable.
|
|
@@ -8279,11 +8385,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|
if (th >= vect_vf_for_cost (loop_vinfo)
|
|
&& !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
|
{
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "Profitability threshold is %d loop iterations.\n",
|
|
- th);
|
|
- check_profitability = true;
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "Profitability threshold is %d loop iterations.\n",
|
|
+ th);
|
|
+ check_profitability = true;
|
|
}
|
|
|
|
/* Make sure there exists a single-predecessor exit bb. Do this before
|
|
@@ -8301,18 +8407,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|
|
|
if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
|
|
{
|
|
- poly_uint64 versioning_threshold
|
|
- = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
|
|
- if (check_profitability
|
|
- && ordered_p (poly_uint64 (th), versioning_threshold))
|
|
- {
|
|
- versioning_threshold = ordered_max (poly_uint64 (th),
|
|
- versioning_threshold);
|
|
- check_profitability = false;
|
|
- }
|
|
struct loop *sloop
|
|
- = vect_loop_versioning (loop_vinfo, th, check_profitability,
|
|
- versioning_threshold);
|
|
+ = vect_loop_versioning (loop_vinfo);
|
|
sloop->force_vectorize = false;
|
|
check_profitability = false;
|
|
}
|
|
@@ -8337,9 +8433,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
|
|
tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
|
|
bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
|
|
+ tree advance;
|
|
+ drs_init_vec orig_drs_init;
|
|
+
|
|
epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
|
|
&step_vector, &niters_vector_mult_vf, th,
|
|
- check_profitability, niters_no_overflow);
|
|
+ check_profitability, niters_no_overflow,
|
|
+ &advance, orig_drs_init);
|
|
|
|
if (niters_vector == NULL_TREE)
|
|
{
|
|
@@ -8413,7 +8513,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|
|
|
if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
|
|
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|
|
- || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
|
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
|
|
&& ! PURE_SLP_STMT (stmt_info))
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -8565,12 +8667,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|
dump_printf (MSG_NOTE, "\n");
|
|
}
|
|
else
|
|
- {
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "LOOP EPILOGUE VECTORIZED (VS=");
|
|
- dump_dec (MSG_NOTE, current_vector_size);
|
|
- dump_printf (MSG_NOTE, ")\n");
|
|
- }
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "LOOP EPILOGUE VECTORIZED (MODE=%s)\n",
|
|
+ GET_MODE_NAME (loop_vinfo->vector_mode));
|
|
}
|
|
|
|
/* Loops vectorized with a variable factor won't benefit from
|
|
@@ -8592,57 +8691,14 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|
since vectorized loop can have loop-carried dependencies. */
|
|
loop->safelen = 0;
|
|
|
|
- /* Don't vectorize epilogue for epilogue. */
|
|
- if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
|
- epilogue = NULL;
|
|
-
|
|
- if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
|
|
- epilogue = NULL;
|
|
-
|
|
if (epilogue)
|
|
{
|
|
- auto_vector_sizes vector_sizes;
|
|
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
|
- unsigned int next_size = 0;
|
|
-
|
|
- /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
|
|
- on niters already ajusted for the iterations of the prologue. */
|
|
- if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
|
- && known_eq (vf, lowest_vf))
|
|
- {
|
|
- unsigned HOST_WIDE_INT eiters
|
|
- = (LOOP_VINFO_INT_NITERS (loop_vinfo)
|
|
- - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
|
|
- eiters
|
|
- = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
|
|
- epilogue->nb_iterations_upper_bound = eiters - 1;
|
|
- epilogue->any_upper_bound = true;
|
|
-
|
|
- unsigned int ratio;
|
|
- while (next_size < vector_sizes.length ()
|
|
- && !(constant_multiple_p (current_vector_size,
|
|
- vector_sizes[next_size], &ratio)
|
|
- && eiters >= lowest_vf / ratio))
|
|
- next_size += 1;
|
|
- }
|
|
- else
|
|
- while (next_size < vector_sizes.length ()
|
|
- && maybe_lt (current_vector_size, vector_sizes[next_size]))
|
|
- next_size += 1;
|
|
-
|
|
- if (next_size == vector_sizes.length ())
|
|
- epilogue = NULL;
|
|
- }
|
|
+ update_epilogue_loop_vinfo (epilogue, advance, orig_drs_init);
|
|
|
|
- if (epilogue)
|
|
- {
|
|
+ epilogue->simduid = loop->simduid;
|
|
epilogue->force_vectorize = loop->force_vectorize;
|
|
epilogue->safelen = loop->safelen;
|
|
epilogue->dont_vectorize = false;
|
|
-
|
|
- /* We may need to if-convert epilogue to vectorize it. */
|
|
- if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
|
|
- tree_if_conversion (epilogue);
|
|
}
|
|
|
|
return epilogue;
|
|
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
|
|
index badf4e7104e..6356ecd692f 100644
|
|
--- a/gcc/tree-vect-patterns.c
|
|
+++ b/gcc/tree-vect-patterns.c
|
|
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "cgraph.h"
|
|
#include "omp-simd-clone.h"
|
|
#include "predict.h"
|
|
+#include "tree-vector-builder.h"
|
|
+#include "vec-perm-indices.h"
|
|
|
|
/* Return true if we have a useful VR_RANGE range for VAR, storing it
|
|
in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
|
|
@@ -185,15 +187,15 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
|
|
is nonnull. */
|
|
|
|
static bool
|
|
-vect_supportable_direct_optab_p (tree otype, tree_code code,
|
|
+vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
|
|
tree itype, tree *vecotype_out,
|
|
tree *vecitype_out = NULL)
|
|
{
|
|
- tree vecitype = get_vectype_for_scalar_type (itype);
|
|
+ tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
|
|
if (!vecitype)
|
|
return false;
|
|
|
|
- tree vecotype = get_vectype_for_scalar_type (otype);
|
|
+ tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
|
|
if (!vecotype)
|
|
return false;
|
|
|
|
@@ -632,6 +634,7 @@ static bool
|
|
vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
|
|
gimple *stmt1, tree vectype)
|
|
{
|
|
+ vec_info *vinfo = stmt2_info->vinfo;
|
|
if (is_pattern_stmt_p (stmt2_info))
|
|
{
|
|
/* STMT2_INFO is part of a pattern. Get the statement to which
|
|
@@ -675,7 +678,7 @@ vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
|
|
two-statement pattern now. */
|
|
gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
|
|
tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
|
|
- tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
|
|
+ tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
|
|
if (!lhs_vectype)
|
|
return false;
|
|
|
|
@@ -712,6 +715,8 @@ static tree
|
|
vect_convert_input (stmt_vec_info stmt_info, tree type,
|
|
vect_unpromoted_value *unprom, tree vectype)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
+
|
|
/* Check for a no-op conversion. */
|
|
if (types_compatible_p (type, TREE_TYPE (unprom->op)))
|
|
return unprom->op;
|
|
@@ -749,7 +754,7 @@ vect_convert_input (stmt_vec_info stmt_info, tree type,
|
|
unsigned promotion. */
|
|
tree midtype = build_nonstandard_integer_type
|
|
(TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
|
|
- tree vec_midtype = get_vectype_for_scalar_type (midtype);
|
|
+ tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
|
|
if (vec_midtype)
|
|
{
|
|
input = vect_recog_temp_ssa_var (midtype, NULL);
|
|
@@ -830,17 +835,8 @@ vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
|
|
/* Return true if STMT_VINFO describes a reduction for which reassociation
|
|
is allowed. If STMT_INFO is part of a group, assume that it's part of
|
|
a reduction chain and optimistically assume that all statements
|
|
- except the last allow reassociation. */
|
|
-
|
|
-static bool
|
|
-vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
|
|
-{
|
|
- return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
|
|
- ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
|
|
- : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
|
|
-}
|
|
-
|
|
-/* As above, but also require it to have code CODE and to be a reduction
|
|
+ except the last allow reassociation.
|
|
+ Also require it to have code CODE and to be a reduction
|
|
in the outermost loop. When returning true, store the operands in
|
|
*OP0_OUT and *OP1_OUT. */
|
|
|
|
@@ -862,11 +858,19 @@ vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
|
|
if (loop && nested_in_vect_loop_p (loop, stmt_info))
|
|
return false;
|
|
|
|
- if (!vect_reassociating_reduction_p (stmt_info))
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
|
|
+ {
|
|
+ if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
|
|
+ code))
|
|
+ return false;
|
|
+ }
|
|
+ else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
|
|
return false;
|
|
|
|
*op0_out = gimple_assign_rhs1 (assign);
|
|
*op1_out = gimple_assign_rhs2 (assign);
|
|
+ if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
|
|
+ std::swap (*op0_out, *op1_out);
|
|
return true;
|
|
}
|
|
|
|
@@ -983,7 +987,7 @@ vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
|
|
|
|
tree half_vectype;
|
|
- if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
|
|
+ if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
|
|
type_out, &half_vectype))
|
|
return NULL;
|
|
|
|
@@ -1141,7 +1145,7 @@ vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
|
|
|
|
tree half_vectype;
|
|
- if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
|
|
+ if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
|
|
type_out, &half_vectype))
|
|
return NULL;
|
|
|
|
@@ -1187,6 +1191,7 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
|
|
tree_code orig_code, tree_code wide_code,
|
|
bool shift_p, const char *name)
|
|
{
|
|
+ vec_info *vinfo = last_stmt_info->vinfo;
|
|
gimple *last_stmt = last_stmt_info->stmt;
|
|
|
|
vect_unpromoted_value unprom[2];
|
|
@@ -1206,8 +1211,8 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
|
|
TYPE_UNSIGNED (half_type));
|
|
|
|
/* Check target support */
|
|
- tree vectype = get_vectype_for_scalar_type (half_type);
|
|
- tree vecitype = get_vectype_for_scalar_type (itype);
|
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
|
|
+ tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
|
|
enum tree_code dummy_code;
|
|
int dummy_int;
|
|
auto_vec<tree> dummy_vec;
|
|
@@ -1219,7 +1224,7 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
|
|
&dummy_int, &dummy_vec))
|
|
return NULL;
|
|
|
|
- *type_out = get_vectype_for_scalar_type (type);
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, type);
|
|
if (!*type_out)
|
|
return NULL;
|
|
|
|
@@ -1271,6 +1276,7 @@ vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
|
static gimple *
|
|
vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
{
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
gimple *last_stmt = stmt_vinfo->stmt;
|
|
tree base, exp;
|
|
gimple *stmt;
|
|
@@ -1339,7 +1345,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
if (node->simd_clones == NULL)
|
|
return NULL;
|
|
}
|
|
- *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
|
|
if (!*type_out)
|
|
return NULL;
|
|
tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
|
|
@@ -1364,7 +1370,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|| (TREE_CODE (exp) == REAL_CST
|
|
&& real_equal (&TREE_REAL_CST (exp), &dconst2)))
|
|
{
|
|
- if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
|
|
+ if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
|
|
TREE_TYPE (base), type_out))
|
|
return NULL;
|
|
|
|
@@ -1377,7 +1383,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
if (TREE_CODE (exp) == REAL_CST
|
|
&& real_equal (&TREE_REAL_CST (exp), &dconsthalf))
|
|
{
|
|
- *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
|
|
if (*type_out
|
|
&& direct_internal_fn_supported_p (IFN_SQRT, *type_out,
|
|
OPTIMIZE_FOR_SPEED))
|
|
@@ -1470,8 +1476,8 @@ vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|
|
vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
|
|
|
|
- if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
|
|
- type_out))
|
|
+ if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
|
|
+ unprom0.type, type_out))
|
|
return NULL;
|
|
|
|
var = vect_recog_temp_ssa_var (type, NULL);
|
|
@@ -1662,7 +1668,7 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
|
|
|
vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
|
|
|
|
- *type_out = get_vectype_for_scalar_type (type);
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, type);
|
|
if (!*type_out)
|
|
return NULL;
|
|
|
|
@@ -1683,8 +1689,8 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
|
wants to rewrite anyway. If targets have a minimum element size
|
|
for some optabs, we should pattern-match smaller ops to larger ops
|
|
where beneficial. */
|
|
- tree new_vectype = get_vectype_for_scalar_type (new_type);
|
|
- tree op_vectype = get_vectype_for_scalar_type (op_type);
|
|
+ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
|
|
+ tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
|
|
if (!new_vectype || !op_vectype)
|
|
return NULL;
|
|
|
|
@@ -1842,7 +1848,7 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
|
TYPE_UNSIGNED (new_type));
|
|
|
|
/* Check for target support. */
|
|
- tree new_vectype = get_vectype_for_scalar_type (new_type);
|
|
+ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
|
|
if (!new_vectype
|
|
|| !direct_internal_fn_supported_p (ifn, new_vectype,
|
|
OPTIMIZE_FOR_SPEED))
|
|
@@ -1850,7 +1856,7 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
|
|
|
/* The IR requires a valid vector type for the cast result, even though
|
|
it's likely to be discarded. */
|
|
- *type_out = get_vectype_for_scalar_type (type);
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, type);
|
|
if (!*type_out)
|
|
return NULL;
|
|
|
|
@@ -1936,7 +1942,7 @@ vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
|
|
the unnecessary widening and narrowing. */
|
|
vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
|
|
|
|
- *type_out = get_vectype_for_scalar_type (lhs_type);
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
|
|
if (!*type_out)
|
|
return NULL;
|
|
|
|
@@ -1996,24 +2002,107 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
enum vect_def_type dt;
|
|
optab optab1, optab2;
|
|
edge ext_def = NULL;
|
|
+ bool bswap16_p = false;
|
|
|
|
- if (!is_gimple_assign (last_stmt))
|
|
- return NULL;
|
|
+ if (is_gimple_assign (last_stmt))
|
|
+ {
|
|
+ rhs_code = gimple_assign_rhs_code (last_stmt);
|
|
+ switch (rhs_code)
|
|
+ {
|
|
+ case LROTATE_EXPR:
|
|
+ case RROTATE_EXPR:
|
|
+ break;
|
|
+ default:
|
|
+ return NULL;
|
|
+ }
|
|
|
|
- rhs_code = gimple_assign_rhs_code (last_stmt);
|
|
- switch (rhs_code)
|
|
+ lhs = gimple_assign_lhs (last_stmt);
|
|
+ oprnd0 = gimple_assign_rhs1 (last_stmt);
|
|
+ type = TREE_TYPE (oprnd0);
|
|
+ oprnd1 = gimple_assign_rhs2 (last_stmt);
|
|
+ }
|
|
+ else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
|
|
{
|
|
- case LROTATE_EXPR:
|
|
- case RROTATE_EXPR:
|
|
- break;
|
|
- default:
|
|
- return NULL;
|
|
+ /* __builtin_bswap16 (x) is another form of x r>> 8.
|
|
+ The vectorizer has bswap support, but only if the argument isn't
|
|
+ promoted. */
|
|
+ lhs = gimple_call_lhs (last_stmt);
|
|
+ oprnd0 = gimple_call_arg (last_stmt, 0);
|
|
+ type = TREE_TYPE (oprnd0);
|
|
+ if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16
|
|
+ || TYPE_PRECISION (type) <= 16
|
|
+ || TREE_CODE (oprnd0) != SSA_NAME
|
|
+ || BITS_PER_UNIT != 8
|
|
+ || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
|
|
+ return NULL;
|
|
+
|
|
+ stmt_vec_info def_stmt_info;
|
|
+ if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
|
|
+ return NULL;
|
|
+
|
|
+ if (dt != vect_internal_def)
|
|
+ return NULL;
|
|
+
|
|
+ if (gimple_assign_cast_p (def_stmt))
|
|
+ {
|
|
+ def = gimple_assign_rhs1 (def_stmt);
|
|
+ if (INTEGRAL_TYPE_P (TREE_TYPE (def))
|
|
+ && TYPE_PRECISION (TREE_TYPE (def)) == 16)
|
|
+ oprnd0 = def;
|
|
+ }
|
|
+
|
|
+ type = TREE_TYPE (lhs);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, type);
|
|
+ if (vectype == NULL_TREE)
|
|
+ return NULL;
|
|
+
|
|
+ if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
|
|
+ {
|
|
+ /* The encoding uses one stepped pattern for each byte in the
|
|
+ 16-bit word. */
|
|
+ vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
|
|
+ for (unsigned i = 0; i < 3; ++i)
|
|
+ for (unsigned j = 0; j < 2; ++j)
|
|
+ elts.quick_push ((i + 1) * 2 - j - 1);
|
|
+
|
|
+ vec_perm_indices indices (elts, 1,
|
|
+ TYPE_VECTOR_SUBPARTS (char_vectype));
|
|
+ if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
|
|
+ {
|
|
+ /* vectorizable_bswap can handle the __builtin_bswap16 if we
|
|
+ undo the argument promotion. */
|
|
+ if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
|
|
+ {
|
|
+ def = vect_recog_temp_ssa_var (type, NULL);
|
|
+ def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
|
|
+ append_pattern_def_seq (stmt_vinfo, def_stmt);
|
|
+ oprnd0 = def;
|
|
+ }
|
|
+
|
|
+ /* Pattern detected. */
|
|
+ vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
|
|
+
|
|
+ *type_out = vectype;
|
|
+
|
|
+ /* Pattern supported. Create a stmt to be used to replace the
|
|
+ pattern, with the unpromoted argument. */
|
|
+ var = vect_recog_temp_ssa_var (type, NULL);
|
|
+ pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
|
|
+ 1, oprnd0);
|
|
+ gimple_call_set_lhs (pattern_stmt, var);
|
|
+ gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
|
|
+ gimple_call_fntype (last_stmt));
|
|
+ return pattern_stmt;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ oprnd1 = build_int_cst (integer_type_node, 8);
|
|
+ rhs_code = LROTATE_EXPR;
|
|
+ bswap16_p = true;
|
|
}
|
|
+ else
|
|
+ return NULL;
|
|
|
|
- lhs = gimple_assign_lhs (last_stmt);
|
|
- oprnd0 = gimple_assign_rhs1 (last_stmt);
|
|
- type = TREE_TYPE (oprnd0);
|
|
- oprnd1 = gimple_assign_rhs2 (last_stmt);
|
|
if (TREE_CODE (oprnd0) != SSA_NAME
|
|
|| TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
|
|
|| !INTEGRAL_TYPE_P (type)
|
|
@@ -2029,7 +2118,7 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
&& dt != vect_external_def)
|
|
return NULL;
|
|
|
|
- vectype = get_vectype_for_scalar_type (type);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, type);
|
|
if (vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -2038,14 +2127,39 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
|
|
if (optab1
|
|
&& optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
|
|
- return NULL;
|
|
+ {
|
|
+ use_rotate:
|
|
+ if (bswap16_p)
|
|
+ {
|
|
+ if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
|
|
+ {
|
|
+ def = vect_recog_temp_ssa_var (type, NULL);
|
|
+ def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
|
|
+ append_pattern_def_seq (stmt_vinfo, def_stmt);
|
|
+ oprnd0 = def;
|
|
+ }
|
|
+
|
|
+ /* Pattern detected. */
|
|
+ vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
|
|
+
|
|
+ *type_out = vectype;
|
|
+
|
|
+ /* Pattern supported. Create a stmt to be used to replace the
|
|
+ pattern. */
|
|
+ var = vect_recog_temp_ssa_var (type, NULL);
|
|
+ pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
|
|
+ oprnd1);
|
|
+ return pattern_stmt;
|
|
+ }
|
|
+ return NULL;
|
|
+ }
|
|
|
|
if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
|
|
{
|
|
optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
|
|
if (optab2
|
|
&& optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
|
|
- return NULL;
|
|
+ goto use_rotate;
|
|
}
|
|
|
|
/* If vector/vector or vector/scalar shifts aren't supported by the target,
|
|
@@ -2070,6 +2184,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|
|
*type_out = vectype;
|
|
|
|
+ if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
|
|
+ {
|
|
+ def = vect_recog_temp_ssa_var (type, NULL);
|
|
+ def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
|
|
+ append_pattern_def_seq (stmt_vinfo, def_stmt);
|
|
+ oprnd0 = def;
|
|
+ }
|
|
+
|
|
if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
|
|
ext_def = vect_get_external_def_edge (vinfo, oprnd1);
|
|
|
|
@@ -2106,7 +2228,7 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
}
|
|
else
|
|
{
|
|
- tree vecstype = get_vectype_for_scalar_type (stype);
|
|
+ tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
|
|
|
|
if (vecstype == NULL_TREE)
|
|
return NULL;
|
|
@@ -2235,7 +2357,7 @@ vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
|
|
if (!def_vinfo)
|
|
return NULL;
|
|
|
|
- *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
|
|
+ *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
|
|
if (*type_out == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -2258,7 +2380,8 @@ vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
|
|
TYPE_PRECISION (TREE_TYPE (oprnd1)));
|
|
def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
|
|
def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
|
|
- tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
|
|
+ tree vecstype = get_vectype_for_scalar_type (vinfo,
|
|
+ TREE_TYPE (rhs1));
|
|
append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
|
|
}
|
|
}
|
|
@@ -2423,6 +2546,7 @@ static gimple *
|
|
vect_synth_mult_by_constant (tree op, tree val,
|
|
stmt_vec_info stmt_vinfo)
|
|
{
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
tree itype = TREE_TYPE (op);
|
|
machine_mode mode = TYPE_MODE (itype);
|
|
struct algorithm alg;
|
|
@@ -2441,7 +2565,7 @@ vect_synth_mult_by_constant (tree op, tree val,
|
|
|
|
/* Targets that don't support vector shifts but support vector additions
|
|
can synthesize shifts that way. */
|
|
- bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
|
|
+ bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
|
|
|
|
HOST_WIDE_INT hwval = tree_to_shwi (val);
|
|
/* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
|
|
@@ -2452,7 +2576,7 @@ vect_synth_mult_by_constant (tree op, tree val,
|
|
if (!possible)
|
|
return NULL;
|
|
|
|
- tree vectype = get_vectype_for_scalar_type (multtype);
|
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
|
|
|
|
if (!vectype
|
|
|| !target_supports_mult_synth_alg (&alg, variant,
|
|
@@ -2598,6 +2722,7 @@ vect_synth_mult_by_constant (tree op, tree val,
|
|
static gimple *
|
|
vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
{
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
gimple *last_stmt = stmt_vinfo->stmt;
|
|
tree oprnd0, oprnd1, vectype, itype;
|
|
gimple *pattern_stmt;
|
|
@@ -2618,7 +2743,7 @@ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|| !type_has_mode_precision_p (itype))
|
|
return NULL;
|
|
|
|
- vectype = get_vectype_for_scalar_type (itype);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, itype);
|
|
if (vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -2686,6 +2811,7 @@ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
static gimple *
|
|
vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
{
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
gimple *last_stmt = stmt_vinfo->stmt;
|
|
tree oprnd0, oprnd1, vectype, itype, cond;
|
|
gimple *pattern_stmt, *def_stmt;
|
|
@@ -2718,7 +2844,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
return NULL;
|
|
|
|
scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
|
|
- vectype = get_vectype_for_scalar_type (itype);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, itype);
|
|
if (vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -2785,7 +2911,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
{
|
|
tree utype
|
|
= build_nonstandard_integer_type (prec, 1);
|
|
- tree vecutype = get_vectype_for_scalar_type (utype);
|
|
+ tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
|
|
tree shift
|
|
= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
|
|
- tree_log2 (oprnd1));
|
|
@@ -3104,6 +3230,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
static gimple *
|
|
vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
{
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
gimple *last_stmt = stmt_vinfo->stmt;
|
|
tree cond_expr, then_clause, else_clause;
|
|
tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
|
|
@@ -3126,7 +3253,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
return NULL;
|
|
|
|
comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
|
|
- comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
|
|
+ comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
|
|
if (comp_vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -3174,7 +3301,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
|
|
return NULL;
|
|
|
|
- vectype = get_vectype_for_scalar_type (type);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, type);
|
|
if (vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -3189,7 +3316,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|| GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
|
|
return NULL;
|
|
|
|
- vecitype = get_vectype_for_scalar_type (itype);
|
|
+ vecitype = get_vectype_for_scalar_type (vinfo, itype);
|
|
if (vecitype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -3283,11 +3410,12 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
|
|
if (stmt_could_throw_p (cfun, def_stmt))
|
|
return false;
|
|
|
|
- comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
|
|
+ comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
|
|
if (comp_vectype == NULL_TREE)
|
|
return false;
|
|
|
|
- tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
|
|
+ tree mask_type = get_mask_type_for_scalar_type (vinfo,
|
|
+ TREE_TYPE (rhs1));
|
|
if (mask_type
|
|
&& expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
|
|
return false;
|
|
@@ -3297,7 +3425,7 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
|
|
scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
|
|
tree itype
|
|
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
|
|
- vecitype = get_vectype_for_scalar_type (itype);
|
|
+ vecitype = get_vectype_for_scalar_type (vinfo, itype);
|
|
if (vecitype == NULL_TREE)
|
|
return false;
|
|
}
|
|
@@ -3326,10 +3454,11 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
|
|
static tree
|
|
adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
|
|
NOP_EXPR, var);
|
|
append_pattern_def_seq (stmt_info, cast_stmt,
|
|
- get_vectype_for_scalar_type (type));
|
|
+ get_vectype_for_scalar_type (vinfo, type));
|
|
return gimple_assign_lhs (cast_stmt);
|
|
}
|
|
|
|
@@ -3343,6 +3472,7 @@ static void
|
|
adjust_bool_pattern (tree var, tree out_type,
|
|
stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
gimple *stmt = SSA_NAME_DEF_STMT (var);
|
|
enum tree_code rhs_code, def_rhs_code;
|
|
tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
|
|
@@ -3504,7 +3634,7 @@ adjust_bool_pattern (tree var, tree out_type,
|
|
|
|
gimple_set_location (pattern_stmt, loc);
|
|
append_pattern_def_seq (stmt_info, pattern_stmt,
|
|
- get_vectype_for_scalar_type (itype));
|
|
+ get_vectype_for_scalar_type (vinfo, itype));
|
|
defs.put (var, gimple_assign_lhs (pattern_stmt));
|
|
}
|
|
|
|
@@ -3607,14 +3737,14 @@ search_type_for_mask_1 (tree var, vec_info *vinfo,
|
|
break;
|
|
}
|
|
|
|
- comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
|
|
+ comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
|
|
if (comp_vectype == NULL_TREE)
|
|
{
|
|
res = NULL_TREE;
|
|
break;
|
|
}
|
|
|
|
- mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
|
|
+ mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1));
|
|
if (!mask_type
|
|
|| !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
|
|
{
|
|
@@ -3722,7 +3852,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
|| TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
|
|
return NULL;
|
|
- vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
|
|
if (vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -3759,7 +3889,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|
|
if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
|
|
{
|
|
- tree new_vectype = get_vectype_for_scalar_type (type);
|
|
+ tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
|
|
append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
|
|
|
|
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
|
|
@@ -3775,7 +3905,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
else if (rhs_code == COND_EXPR
|
|
&& TREE_CODE (var) == SSA_NAME)
|
|
{
|
|
- vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
|
|
if (vectype == NULL_TREE)
|
|
return NULL;
|
|
|
|
@@ -3789,7 +3919,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
tree type
|
|
= build_nonstandard_integer_type (prec,
|
|
TYPE_UNSIGNED (TREE_TYPE (var)));
|
|
- if (get_vectype_for_scalar_type (type) == NULL_TREE)
|
|
+ if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
|
|
return NULL;
|
|
|
|
if (!check_bool_pattern (var, vinfo, bool_stmts))
|
|
@@ -3833,7 +3963,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|
|
cst0 = build_int_cst (type, 0);
|
|
cst1 = build_int_cst (type, 1);
|
|
- new_vectype = get_vectype_for_scalar_type (type);
|
|
+ new_vectype = get_vectype_for_scalar_type (vinfo, type);
|
|
|
|
rhs = vect_recog_temp_ssa_var (type, NULL);
|
|
pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
|
|
@@ -3874,7 +4004,7 @@ build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
|
|
gimple *stmt;
|
|
tree masktype, tmp;
|
|
|
|
- masktype = build_same_sized_truth_vector_type (vectype);
|
|
+ masktype = truth_type_for (vectype);
|
|
tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
|
|
stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
|
|
append_pattern_def_seq (stmt_vinfo, stmt, masktype);
|
|
@@ -3934,19 +4064,19 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
{
|
|
int rhs_index = internal_fn_stored_value_index (ifn);
|
|
tree rhs = gimple_call_arg (last_stmt, rhs_index);
|
|
- vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
|
|
+ vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
|
|
}
|
|
else
|
|
{
|
|
lhs = gimple_call_lhs (last_stmt);
|
|
- vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
|
|
+ vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
|
|
}
|
|
|
|
tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
|
|
tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
|
|
if (!mask_arg_type)
|
|
return NULL;
|
|
- vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
|
|
+ vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
|
|
|
|
if (!vectype1 || !vectype2
|
|
|| known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
|
|
@@ -3992,7 +4122,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
/* Check for cond expression requiring mask conversion. */
|
|
if (rhs_code == COND_EXPR)
|
|
{
|
|
- vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
|
|
+ vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
|
|
|
|
if (TREE_CODE (rhs1) == SSA_NAME)
|
|
{
|
|
@@ -4023,7 +4153,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
else
|
|
return NULL;
|
|
|
|
- vectype2 = get_mask_type_for_scalar_type (rhs1_type);
|
|
+ vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
|
|
|
|
if (!vectype1 || !vectype2)
|
|
return NULL;
|
|
@@ -4058,7 +4188,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
tree wide_scalar_type = build_nonstandard_integer_type
|
|
(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
|
|
TYPE_UNSIGNED (rhs1_type));
|
|
- tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
|
|
+ tree vectype3 = get_vectype_for_scalar_type (vinfo,
|
|
+ wide_scalar_type);
|
|
if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
|
|
return NULL;
|
|
}
|
|
@@ -4113,14 +4244,14 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
|
|
|
if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
|
|
{
|
|
- vectype1 = get_mask_type_for_scalar_type (rhs1_type);
|
|
+ vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
|
|
if (!vectype1)
|
|
return NULL;
|
|
rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
|
|
}
|
|
else
|
|
{
|
|
- vectype1 = get_mask_type_for_scalar_type (rhs2_type);
|
|
+ vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
|
|
if (!vectype1)
|
|
return NULL;
|
|
rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
|
|
@@ -4191,7 +4322,7 @@ vect_convert_mask_for_vectype (tree mask, tree vectype,
|
|
tree mask_type = search_type_for_mask (mask, vinfo);
|
|
if (mask_type)
|
|
{
|
|
- tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
|
|
+ tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
|
|
if (mask_vectype
|
|
&& maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
|
|
TYPE_VECTOR_SUBPARTS (mask_vectype)))
|
|
@@ -4214,10 +4345,11 @@ vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
|
|
if (useless_type_conversion_p (type, TREE_TYPE (value)))
|
|
return value;
|
|
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
tree new_value = vect_recog_temp_ssa_var (type, NULL);
|
|
gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
|
|
append_pattern_def_seq (stmt_info, conversion,
|
|
- get_vectype_for_scalar_type (type));
|
|
+ get_vectype_for_scalar_type (vinfo, type));
|
|
return new_value;
|
|
}
|
|
|
|
@@ -4253,7 +4385,8 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
|
|
return NULL;
|
|
|
|
/* Convert the mask to the right form. */
|
|
- tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
|
|
+ tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
|
|
+ gs_info.element_type);
|
|
if (mask)
|
|
mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
|
|
loop_vinfo);
|
|
@@ -4731,6 +4864,7 @@ static inline void
|
|
vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
|
|
tree pattern_vectype)
|
|
{
|
|
+ stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
|
|
gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
|
|
|
|
gimple *orig_pattern_stmt = NULL;
|
|
@@ -4765,6 +4899,9 @@ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
|
|
for (gimple_stmt_iterator si = gsi_start (def_seq);
|
|
!gsi_end_p (si); gsi_next (&si))
|
|
{
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "extra pattern stmt: %G", gsi_stmt (si));
|
|
stmt_vec_info pattern_stmt_info
|
|
= vect_init_pattern_stmt (gsi_stmt (si),
|
|
orig_stmt_info, pattern_vectype);
|
|
@@ -4790,6 +4927,60 @@ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
|
|
}
|
|
else
|
|
vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
|
|
+
|
|
+ /* Transfer reduction path info to the pattern. */
|
|
+ if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
|
|
+ {
|
|
+ vec_info *vinfo = orig_stmt_info_saved->vinfo;
|
|
+ tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
|
|
+ 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
|
|
+ /* Search the pattern def sequence and the main pattern stmt. Note
|
|
+ we may have inserted all into a containing pattern def sequence
|
|
+ so the following is a bit awkward. */
|
|
+ gimple_stmt_iterator si;
|
|
+ gimple *s;
|
|
+ if (def_seq)
|
|
+ {
|
|
+ si = gsi_start (def_seq);
|
|
+ s = gsi_stmt (si);
|
|
+ gsi_next (&si);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ si = gsi_none ();
|
|
+ s = pattern_stmt;
|
|
+ }
|
|
+ do
|
|
+ {
|
|
+ bool found = false;
|
|
+ for (unsigned i = 1; i < gimple_num_ops (s); ++i)
|
|
+ if (gimple_op (s, i) == lookfor)
|
|
+ {
|
|
+ STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
|
|
+ lookfor = gimple_get_lhs (s);
|
|
+ found = true;
|
|
+ break;
|
|
+ }
|
|
+ if (s == pattern_stmt)
|
|
+ {
|
|
+ if (!found && dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "failed to update reduction index.\n");
|
|
+ break;
|
|
+ }
|
|
+ if (gsi_end_p (si))
|
|
+ s = pattern_stmt;
|
|
+ else
|
|
+ {
|
|
+ s = gsi_stmt (si);
|
|
+ if (s == pattern_stmt)
|
|
+ /* Found the end inside a bigger pattern def seq. */
|
|
+ si = gsi_none ();
|
|
+ else
|
|
+ gsi_next (&si);
|
|
+ }
|
|
+ } while (1);
|
|
+ }
|
|
}
|
|
|
|
/* Function vect_pattern_recog_1
|
|
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
|
index 2abf480270c..0bef35782b5 100644
|
|
--- a/gcc/tree-vect-slp.c
|
|
+++ b/gcc/tree-vect-slp.c
|
|
@@ -79,6 +79,7 @@ vect_free_slp_tree (slp_tree node, bool final_p)
|
|
|
|
SLP_TREE_CHILDREN (node).release ();
|
|
SLP_TREE_SCALAR_STMTS (node).release ();
|
|
+ SLP_TREE_SCALAR_OPS (node).release ();
|
|
SLP_TREE_VEC_STMTS (node).release ();
|
|
SLP_TREE_LOAD_PERMUTATION (node).release ();
|
|
|
|
@@ -122,6 +123,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
|
|
|
|
node = XNEW (struct _slp_tree);
|
|
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
|
|
+ SLP_TREE_SCALAR_OPS (node) = vNULL;
|
|
SLP_TREE_VEC_STMTS (node).create (0);
|
|
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
|
SLP_TREE_CHILDREN (node).create (nops);
|
|
@@ -138,6 +140,28 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
|
|
return node;
|
|
}
|
|
|
|
+/* Create an SLP node for OPS. */
|
|
+
|
|
+static slp_tree
|
|
+vect_create_new_slp_node (vec<tree> ops)
|
|
+{
|
|
+ slp_tree node;
|
|
+
|
|
+ node = XNEW (struct _slp_tree);
|
|
+ SLP_TREE_SCALAR_STMTS (node) = vNULL;
|
|
+ SLP_TREE_SCALAR_OPS (node) = ops;
|
|
+ SLP_TREE_VEC_STMTS (node).create (0);
|
|
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
|
+ SLP_TREE_CHILDREN (node) = vNULL;
|
|
+ SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
|
+ SLP_TREE_TWO_OPERATORS (node) = false;
|
|
+ SLP_TREE_DEF_TYPE (node) = vect_external_def;
|
|
+ node->refcnt = 1;
|
|
+ node->max_nunits = 1;
|
|
+
|
|
+ return node;
|
|
+}
|
|
+
|
|
|
|
/* This structure is used in creation of an SLP tree. Each instance
|
|
corresponds to the same operand in a group of scalar stmts in an SLP
|
|
@@ -146,6 +170,8 @@ typedef struct _slp_oprnd_info
|
|
{
|
|
/* Def-stmts for the operands. */
|
|
vec<stmt_vec_info> def_stmts;
|
|
+ /* Operands. */
|
|
+ vec<tree> ops;
|
|
/* Information about the first statement, its vector def-type, type, the
|
|
operand itself in case it's constant, and an indication if it's a pattern
|
|
stmt. */
|
|
@@ -169,6 +195,7 @@ vect_create_oprnd_info (int nops, int group_size)
|
|
{
|
|
oprnd_info = XNEW (struct _slp_oprnd_info);
|
|
oprnd_info->def_stmts.create (group_size);
|
|
+ oprnd_info->ops.create (group_size);
|
|
oprnd_info->first_dt = vect_uninitialized_def;
|
|
oprnd_info->first_op_type = NULL_TREE;
|
|
oprnd_info->any_pattern = false;
|
|
@@ -190,6 +217,7 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
|
|
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
|
|
{
|
|
oprnd_info->def_stmts.release ();
|
|
+ oprnd_info->ops.release ();
|
|
XDELETE (oprnd_info);
|
|
}
|
|
|
|
@@ -197,6 +225,19 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
|
|
}
|
|
|
|
|
|
+/* Return true if STMTS contains a pattern statement. */
|
|
+
|
|
+static bool
|
|
+vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
|
|
+{
|
|
+ stmt_vec_info stmt_info;
|
|
+ unsigned int i;
|
|
+ FOR_EACH_VEC_ELT (stmts, i, stmt_info)
|
|
+ if (is_pattern_stmt_p (stmt_info))
|
|
+ return true;
|
|
+ return false;
|
|
+}
|
|
+
|
|
/* Find the place of the data-ref in STMT_INFO in the interleaving chain
|
|
that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
|
|
of the chain. */
|
|
@@ -231,7 +272,8 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
|
|
(if nonnull). */
|
|
|
|
bool
|
|
-can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
|
|
+can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
|
|
+ machine_mode elt_mode,
|
|
unsigned int *nvectors_out,
|
|
tree *vector_type_out,
|
|
tree *permutes)
|
|
@@ -243,7 +285,7 @@ can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
|
|
{
|
|
scalar_int_mode int_mode;
|
|
poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
|
|
- if (multiple_p (current_vector_size, elt_bytes, &nelts)
|
|
+ if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
|
|
&& int_mode_for_size (elt_bits, 0).exists (&int_mode))
|
|
{
|
|
tree int_type = build_nonstandard_integer_type
|
|
@@ -322,6 +364,14 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
|
|
{
|
|
internal_fn ifn = gimple_call_internal_fn (stmt);
|
|
commutative_op = first_commutative_argument (ifn);
|
|
+
|
|
+ /* Masked load, only look at mask. */
|
|
+ if (ifn == IFN_MASK_LOAD)
|
|
+ {
|
|
+ number_of_oprnds = 1;
|
|
+ /* Mask operand index. */
|
|
+ first_op_idx = 5;
|
|
+ }
|
|
}
|
|
}
|
|
else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
|
|
@@ -380,6 +430,13 @@ again:
|
|
|
|
if (first)
|
|
{
|
|
+ /* For the swapping logic below force vect_reduction_def
|
|
+ for the reduction op in a SLP reduction group. */
|
|
+ if (!STMT_VINFO_DATA_REF (stmt_info)
|
|
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
|
+ && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
|
|
+ && def_stmt_info)
|
|
+ dt = vect_reduction_def;
|
|
oprnd_info->first_dt = dt;
|
|
oprnd_info->first_op_type = TREE_TYPE (oprnd);
|
|
}
|
|
@@ -389,20 +446,35 @@ again:
|
|
the def-stmt/s of the first stmt. Allow different definition
|
|
types for reduction chains: the first stmt must be a
|
|
vect_reduction_def (a phi node), and the rest
|
|
- vect_internal_def. */
|
|
+ end in the reduction chain. */
|
|
tree type = TREE_TYPE (oprnd);
|
|
if ((oprnd_info->first_dt != dt
|
|
&& !(oprnd_info->first_dt == vect_reduction_def
|
|
- && dt == vect_internal_def)
|
|
+ && !STMT_VINFO_DATA_REF (stmt_info)
|
|
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
|
+ && def_stmt_info
|
|
+ && !STMT_VINFO_DATA_REF (def_stmt_info)
|
|
+ && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
|
|
+ == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
|
|
&& !((oprnd_info->first_dt == vect_external_def
|
|
|| oprnd_info->first_dt == vect_constant_def)
|
|
&& (dt == vect_external_def
|
|
|| dt == vect_constant_def)))
|
|
- || !types_compatible_p (oprnd_info->first_op_type, type))
|
|
+ || !types_compatible_p (oprnd_info->first_op_type, type)
|
|
+ || (!STMT_VINFO_DATA_REF (stmt_info)
|
|
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
|
+ && ((!def_stmt_info
|
|
+ || STMT_VINFO_DATA_REF (def_stmt_info)
|
|
+ || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
|
|
+ != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
|
|
+ != (oprnd_info->first_dt != vect_reduction_def))))
|
|
{
|
|
/* Try swapping operands if we got a mismatch. */
|
|
if (i == commutative_op && !swapped)
|
|
{
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "trying swapped operands\n");
|
|
swapped = true;
|
|
goto again;
|
|
}
|
|
@@ -415,9 +487,9 @@ again:
|
|
}
|
|
if ((dt == vect_constant_def
|
|
|| dt == vect_external_def)
|
|
- && !current_vector_size.is_constant ()
|
|
+ && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
|
|
&& (TREE_CODE (type) == BOOLEAN_TYPE
|
|
- || !can_duplicate_and_interleave_p (stmts.length (),
|
|
+ || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
|
|
TYPE_MODE (type))))
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -431,14 +503,37 @@ again:
|
|
/* Check the types of the definitions. */
|
|
switch (dt)
|
|
{
|
|
- case vect_constant_def:
|
|
case vect_external_def:
|
|
+ /* Make sure to demote the overall operand to external. */
|
|
+ oprnd_info->first_dt = vect_external_def;
|
|
+ /* Fallthru. */
|
|
+ case vect_constant_def:
|
|
+ oprnd_info->def_stmts.quick_push (NULL);
|
|
+ oprnd_info->ops.quick_push (oprnd);
|
|
break;
|
|
|
|
+ case vect_internal_def:
|
|
case vect_reduction_def:
|
|
+ if (oprnd_info->first_dt == vect_reduction_def
|
|
+ && !STMT_VINFO_DATA_REF (stmt_info)
|
|
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
|
+ && !STMT_VINFO_DATA_REF (def_stmt_info)
|
|
+ && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
|
|
+ == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
|
|
+ {
|
|
+ /* For a SLP reduction chain we want to duplicate the
|
|
+ reduction to each of the chain members. That gets
|
|
+ us a sane SLP graph (still the stmts are not 100%
|
|
+ correct wrt the initial values). */
|
|
+ gcc_assert (!first);
|
|
+ oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
|
|
+ oprnd_info->ops.quick_push (oprnd_info->ops[0]);
|
|
+ break;
|
|
+ }
|
|
+ /* Fallthru. */
|
|
case vect_induction_def:
|
|
- case vect_internal_def:
|
|
oprnd_info->def_stmts.quick_push (def_stmt_info);
|
|
+ oprnd_info->ops.quick_push (oprnd);
|
|
break;
|
|
|
|
default:
|
|
@@ -468,6 +563,8 @@ again:
|
|
|
|
if (first_op_cond)
|
|
{
|
|
+ /* To get rid of this swapping we have to move the stmt code
|
|
+ to the SLP tree as well (and gather it here per stmt). */
|
|
gassign *stmt = as_a <gassign *> (stmt_info->stmt);
|
|
tree cond = gimple_assign_rhs1 (stmt);
|
|
enum tree_code code = TREE_CODE (cond);
|
|
@@ -492,10 +589,8 @@ again:
|
|
}
|
|
else
|
|
{
|
|
- unsigned int op = commutative_op + first_op_idx;
|
|
- swap_ssa_operands (stmt_info->stmt,
|
|
- gimple_op_ptr (stmt_info->stmt, op),
|
|
- gimple_op_ptr (stmt_info->stmt, op + 1));
|
|
+ /* Commutative ops need not reflect swapping, ops are in
|
|
+ the SLP tree. */
|
|
}
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
@@ -620,7 +715,7 @@ vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
|
|
is false then this indicates the comparison could not be
|
|
carried out or the stmts will never be vectorized by SLP.
|
|
|
|
- Note COND_EXPR is possibly ismorphic to another one after swapping its
|
|
+ Note COND_EXPR is possibly isomorphic to another one after swapping its
|
|
operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
|
|
the first stmt by swapping the two operands of comparison; set SWAP[i]
|
|
to 2 if stmt I is isormorphic to the first stmt by inverting the code
|
|
@@ -1030,7 +1125,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
vec<stmt_vec_info> stmts, unsigned int group_size,
|
|
poly_uint64 *max_nunits,
|
|
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
|
- unsigned max_tree_size,
|
|
scalar_stmts_to_slp_tree_map_t *bst_map);
|
|
|
|
static slp_tree
|
|
@@ -1038,7 +1132,6 @@ vect_build_slp_tree (vec_info *vinfo,
|
|
vec<stmt_vec_info> stmts, unsigned int group_size,
|
|
poly_uint64 *max_nunits,
|
|
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
|
- unsigned max_tree_size,
|
|
scalar_stmts_to_slp_tree_map_t *bst_map)
|
|
{
|
|
if (slp_tree *leader = bst_map->get (stmts))
|
|
@@ -1056,8 +1149,7 @@ vect_build_slp_tree (vec_info *vinfo,
|
|
poly_uint64 this_max_nunits = 1;
|
|
slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
|
|
&this_max_nunits,
|
|
- matches, npermutes, tree_size,
|
|
- max_tree_size, bst_map);
|
|
+ matches, npermutes, tree_size, bst_map);
|
|
if (res)
|
|
{
|
|
res->max_nunits = this_max_nunits;
|
|
@@ -1081,7 +1173,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
vec<stmt_vec_info> stmts, unsigned int group_size,
|
|
poly_uint64 *max_nunits,
|
|
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
|
- unsigned max_tree_size,
|
|
scalar_stmts_to_slp_tree_map_t *bst_map)
|
|
{
|
|
unsigned nops, i, this_tree_size = 0;
|
|
@@ -1109,7 +1200,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
|
|
{
|
|
tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
|
|
- tree vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
|
|
return NULL;
|
|
|
|
@@ -1129,18 +1220,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
/* Else def types have to match. */
|
|
stmt_vec_info other_info;
|
|
FOR_EACH_VEC_ELT (stmts, i, other_info)
|
|
- {
|
|
- /* But for reduction chains only check on the first stmt. */
|
|
- if (!STMT_VINFO_DATA_REF (other_info)
|
|
- && REDUC_GROUP_FIRST_ELEMENT (other_info)
|
|
- && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
|
|
- continue;
|
|
- if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
|
|
- return NULL;
|
|
- }
|
|
+ if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
|
|
+ return NULL;
|
|
}
|
|
else
|
|
return NULL;
|
|
+ (*tree_size)++;
|
|
node = vect_create_new_slp_node (stmts);
|
|
return node;
|
|
}
|
|
@@ -1152,13 +1237,23 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
&this_max_nunits, matches, &two_operators))
|
|
return NULL;
|
|
|
|
- /* If the SLP node is a load, terminate the recursion. */
|
|
+ /* If the SLP node is a load, terminate the recursion unless masked. */
|
|
if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
|
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
|
|
{
|
|
- *max_nunits = this_max_nunits;
|
|
- node = vect_create_new_slp_node (stmts);
|
|
- return node;
|
|
+ if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
|
|
+ {
|
|
+ /* Masked load. */
|
|
+ gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
|
|
+ nops = 1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ *max_nunits = this_max_nunits;
|
|
+ (*tree_size)++;
|
|
+ node = vect_create_new_slp_node (stmts);
|
|
+ return node;
|
|
+ }
|
|
}
|
|
|
|
/* Get at the operands, verifying they are compatible. */
|
|
@@ -1184,9 +1279,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
|
|
stmt_info = stmts[0];
|
|
|
|
- if (tree_size)
|
|
- max_tree_size -= *tree_size;
|
|
-
|
|
/* Create SLP_TREE nodes for the definition node/s. */
|
|
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
|
|
{
|
|
@@ -1194,32 +1286,34 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
unsigned old_tree_size = this_tree_size;
|
|
unsigned int j;
|
|
|
|
+ if (oprnd_info->first_dt == vect_uninitialized_def)
|
|
+ {
|
|
+ /* COND_EXPR have one too many eventually if the condition
|
|
+ is a SSA name. */
|
|
+ gcc_assert (i == 3 && nops == 4);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
if (oprnd_info->first_dt != vect_internal_def
|
|
&& oprnd_info->first_dt != vect_reduction_def
|
|
&& oprnd_info->first_dt != vect_induction_def)
|
|
- continue;
|
|
-
|
|
- if (++this_tree_size > max_tree_size)
|
|
{
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
|
- vect_location,
|
|
- "Build SLP failed: SLP tree too large\n");
|
|
- FOR_EACH_VEC_ELT (children, j, child)
|
|
- vect_free_slp_tree (child, false);
|
|
- vect_free_oprnd_info (oprnds_info);
|
|
- return NULL;
|
|
+ slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
|
|
+ SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
|
|
+ oprnd_info->ops = vNULL;
|
|
+ children.safe_push (invnode);
|
|
+ continue;
|
|
}
|
|
|
|
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
|
|
group_size, &this_max_nunits,
|
|
matches, npermutes,
|
|
- &this_tree_size,
|
|
- max_tree_size, bst_map)) != NULL)
|
|
+ &this_tree_size, bst_map)) != NULL)
|
|
{
|
|
/* If we have all children of child built up from scalars then just
|
|
throw that away and build it up this node from scalars. */
|
|
- if (!SLP_TREE_CHILDREN (child).is_empty ()
|
|
+ if (is_a <bb_vec_info> (vinfo)
|
|
+ && !SLP_TREE_CHILDREN (child).is_empty ()
|
|
/* ??? Rejecting patterns this way doesn't work. We'd have to
|
|
do extra work to cancel the pattern so the uses see the
|
|
scalar version. */
|
|
@@ -1244,6 +1338,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
"scalars instead\n");
|
|
oprnd_info->def_stmts = vNULL;
|
|
SLP_TREE_DEF_TYPE (child) = vect_external_def;
|
|
+ SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
|
|
+ oprnd_info->ops = vNULL;
|
|
+ ++this_tree_size;
|
|
children.safe_push (child);
|
|
continue;
|
|
}
|
|
@@ -1273,9 +1370,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Building vector operands from scalars\n");
|
|
+ this_tree_size++;
|
|
child = vect_create_new_slp_node (oprnd_info->def_stmts);
|
|
SLP_TREE_DEF_TYPE (child) = vect_external_def;
|
|
+ SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
|
|
children.safe_push (child);
|
|
+ oprnd_info->ops = vNULL;
|
|
oprnd_info->def_stmts = vNULL;
|
|
continue;
|
|
}
|
|
@@ -1355,6 +1455,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
{
|
|
std::swap (oprnds_info[0]->def_stmts[j],
|
|
oprnds_info[1]->def_stmts[j]);
|
|
+ std::swap (oprnds_info[0]->ops[j],
|
|
+ oprnds_info[1]->ops[j]);
|
|
if (dump_enabled_p ())
|
|
dump_printf (MSG_NOTE, "%d ", j);
|
|
}
|
|
@@ -1365,37 +1467,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
|
|
group_size, &this_max_nunits,
|
|
tem, npermutes,
|
|
- &this_tree_size,
|
|
- max_tree_size, bst_map)) != NULL)
|
|
+ &this_tree_size, bst_map)) != NULL)
|
|
{
|
|
- /* ... so if successful we can apply the operand swapping
|
|
- to the GIMPLE IL. This is necessary because for example
|
|
- vect_get_slp_defs uses operand indexes and thus expects
|
|
- canonical operand order. This is also necessary even
|
|
- if we end up building the operand from scalars as
|
|
- we'll continue to process swapped operand two. */
|
|
- for (j = 0; j < group_size; ++j)
|
|
- gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
|
|
- for (j = 0; j < group_size; ++j)
|
|
- if (matches[j] == !swap_not_matching)
|
|
- {
|
|
- gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
|
|
- /* Avoid swapping operands twice. */
|
|
- if (gimple_plf (stmt, GF_PLF_1))
|
|
- continue;
|
|
- swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
|
|
- gimple_assign_rhs2_ptr (stmt));
|
|
- gimple_set_plf (stmt, GF_PLF_1, true);
|
|
- }
|
|
- /* Verify we swap all duplicates or none. */
|
|
- if (flag_checking)
|
|
- for (j = 0; j < group_size; ++j)
|
|
- gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
|
|
- == (matches[j] == !swap_not_matching));
|
|
-
|
|
/* If we have all children of child built up from scalars then
|
|
just throw that away and build it up this node from scalars. */
|
|
- if (!SLP_TREE_CHILDREN (child).is_empty ()
|
|
+ if (is_a <bb_vec_info> (vinfo)
|
|
+ && !SLP_TREE_CHILDREN (child).is_empty ()
|
|
/* ??? Rejecting patterns this way doesn't work. We'd have
|
|
to do extra work to cancel the pattern so the uses see the
|
|
scalar version. */
|
|
@@ -1421,6 +1498,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|
"scalars instead\n");
|
|
oprnd_info->def_stmts = vNULL;
|
|
SLP_TREE_DEF_TYPE (child) = vect_external_def;
|
|
+ SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
|
|
+ oprnd_info->ops = vNULL;
|
|
+ ++this_tree_size;
|
|
children.safe_push (child);
|
|
continue;
|
|
}
|
|
@@ -1444,8 +1524,7 @@ fail:
|
|
|
|
vect_free_oprnd_info (oprnds_info);
|
|
|
|
- if (tree_size)
|
|
- *tree_size += this_tree_size;
|
|
+ *tree_size += this_tree_size + 1;
|
|
*max_nunits = this_max_nunits;
|
|
|
|
node = vect_create_new_slp_node (stmts);
|
|
@@ -1460,9 +1539,10 @@ static void
|
|
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
|
|
slp_tree node, hash_set<slp_tree> &visited)
|
|
{
|
|
- int i;
|
|
+ unsigned i;
|
|
stmt_vec_info stmt_info;
|
|
slp_tree child;
|
|
+ tree op;
|
|
|
|
if (visited.add (node))
|
|
return;
|
|
@@ -1470,11 +1550,23 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
|
|
dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
|
|
dump_user_location_t user_loc = loc.get_user_location ();
|
|
dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
|
|
- SLP_TREE_DEF_TYPE (node) != vect_internal_def
|
|
- ? " (external)" : "", node,
|
|
+ SLP_TREE_DEF_TYPE (node) == vect_external_def
|
|
+ ? " (external)"
|
|
+ : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
|
|
+ ? " (constant)"
|
|
+ : ""), node,
|
|
estimated_poly_value (node->max_nunits));
|
|
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
|
- dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
|
|
+ if (SLP_TREE_SCALAR_STMTS (node).exists ())
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
|
+ dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
|
|
+ else
|
|
+ {
|
|
+ dump_printf_loc (metadata, user_loc, "\t{ ");
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
|
|
+ dump_printf (metadata, "%T%s ", op,
|
|
+ i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
|
|
+ dump_printf (metadata, "}\n");
|
|
+ }
|
|
if (SLP_TREE_CHILDREN (node).is_empty ())
|
|
return;
|
|
dump_printf_loc (metadata, user_loc, "\tchildren");
|
|
@@ -1563,8 +1655,6 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
|
vec<unsigned> permutation,
|
|
hash_set<slp_tree> &visited)
|
|
{
|
|
- stmt_vec_info stmt_info;
|
|
- vec<stmt_vec_info> tmp_stmts;
|
|
unsigned int i;
|
|
slp_tree child;
|
|
|
|
@@ -1574,15 +1664,30 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
|
vect_slp_rearrange_stmts (child, group_size, permutation, visited);
|
|
|
|
- gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
|
|
- tmp_stmts.create (group_size);
|
|
- tmp_stmts.quick_grow_cleared (group_size);
|
|
-
|
|
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
|
- tmp_stmts[permutation[i]] = stmt_info;
|
|
-
|
|
- SLP_TREE_SCALAR_STMTS (node).release ();
|
|
- SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
|
|
+ if (SLP_TREE_SCALAR_STMTS (node).exists ())
|
|
+ {
|
|
+ gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
|
|
+ vec<stmt_vec_info> tmp_stmts;
|
|
+ tmp_stmts.create (group_size);
|
|
+ tmp_stmts.quick_grow (group_size);
|
|
+ stmt_vec_info stmt_info;
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
|
+ tmp_stmts[permutation[i]] = stmt_info;
|
|
+ SLP_TREE_SCALAR_STMTS (node).release ();
|
|
+ SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
|
|
+ }
|
|
+ if (SLP_TREE_SCALAR_OPS (node).exists ())
|
|
+ {
|
|
+ gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
|
|
+ vec<tree> tmp_ops;
|
|
+ tmp_ops.create (group_size);
|
|
+ tmp_ops.quick_grow (group_size);
|
|
+ tree op;
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
|
|
+ tmp_ops[permutation[i]] = op;
|
|
+ SLP_TREE_SCALAR_OPS (node).release ();
|
|
+ SLP_TREE_SCALAR_OPS (node) = tmp_ops;
|
|
+ }
|
|
}
|
|
|
|
|
|
@@ -1668,9 +1773,10 @@ vect_gather_slp_loads (slp_instance inst, slp_tree node,
|
|
|
|
if (SLP_TREE_CHILDREN (node).length () == 0)
|
|
{
|
|
+ if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
|
|
+ return;
|
|
stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
|
|
- if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
|
|
- && STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
|
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
|
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
|
|
SLP_INSTANCE_LOADS (inst).safe_push (node);
|
|
}
|
|
@@ -1913,7 +2019,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
|
{
|
|
scalar_type = TREE_TYPE (DR_REF (dr));
|
|
- vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
group_size = DR_GROUP_SIZE (stmt_info);
|
|
}
|
|
else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
|
@@ -1964,7 +2070,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|
/* Mark the first element of the reduction chain as reduction to properly
|
|
transform the node. In the reduction analysis phase only the last
|
|
element of the chain is marked as reduction. */
|
|
- STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
|
|
+ STMT_VINFO_DEF_TYPE (stmt_info)
|
|
+ = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
|
|
STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
|
|
= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
|
|
}
|
|
@@ -1982,9 +2089,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|
scalar_stmts_to_slp_tree_map_t *bst_map
|
|
= new scalar_stmts_to_slp_tree_map_t ();
|
|
poly_uint64 max_nunits = nunits;
|
|
+ unsigned tree_size = 0;
|
|
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
|
|
&max_nunits, matches, &npermutes,
|
|
- NULL, max_tree_size, bst_map);
|
|
+ &tree_size, bst_map);
|
|
/* The map keeps a reference on SLP nodes built, release that. */
|
|
for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
|
|
it != bst_map->end (); ++it)
|
|
@@ -1993,6 +2101,34 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|
delete bst_map;
|
|
if (node != NULL)
|
|
{
|
|
+ /* If this is a reduction chain with a conversion in front
|
|
+ amend the SLP tree with a node for that. */
|
|
+ if (!dr
|
|
+ && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
|
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
|
|
+ {
|
|
+ /* Get at the conversion stmt - we know it's the single use
|
|
+ of the last stmt of the reduction chain. */
|
|
+ gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
|
|
+ use_operand_p use_p;
|
|
+ gimple *use_stmt;
|
|
+ bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
|
|
+ gcc_assert (r);
|
|
+ next_info = vinfo->lookup_stmt (use_stmt);
|
|
+ next_info = vect_stmt_to_vectorize (next_info);
|
|
+ scalar_stmts = vNULL;
|
|
+ scalar_stmts.create (group_size);
|
|
+ for (unsigned i = 0; i < group_size; ++i)
|
|
+ scalar_stmts.quick_push (next_info);
|
|
+ slp_tree conv = vect_create_new_slp_node (scalar_stmts);
|
|
+ SLP_TREE_CHILDREN (conv).quick_push (node);
|
|
+ node = conv;
|
|
+ /* We also have to fake this conversion stmt as SLP reduction group
|
|
+ so we don't have to mess with too much code elsewhere. */
|
|
+ REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
|
|
+ REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
|
|
+ }
|
|
+
|
|
/* Calculate the unrolling factor based on the smallest type. */
|
|
poly_uint64 unrolling_factor
|
|
= calculate_unrolling_factor (max_nunits, group_size);
|
|
@@ -2025,6 +2161,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
|
SLP_INSTANCE_LOADS (new_instance) = vNULL;
|
|
vect_gather_slp_loads (new_instance, node);
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "SLP size %u vs. limit %u.\n",
|
|
+ tree_size, max_tree_size);
|
|
|
|
/* Compute the load permutation. */
|
|
slp_tree load_node;
|
|
@@ -2231,8 +2371,11 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
|
|
FOR_EACH_VEC_ELT (slp_instances, i, instance)
|
|
{
|
|
/* FORNOW: SLP if you can. */
|
|
- /* All unroll factors have the form current_vector_size * X for some
|
|
- rational X, so they must have a common multiple. */
|
|
+ /* All unroll factors have the form:
|
|
+
|
|
+ GET_MODE_SIZE (vinfo->vector_mode) * X
|
|
+
|
|
+ for some rational X, so they must have a common multiple. */
|
|
unrolling_factor
|
|
= force_common_multiple (unrolling_factor,
|
|
SLP_INSTANCE_UNROLLING_FACTOR (instance));
|
|
@@ -2327,7 +2470,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
|
|
|
|
if (!only_edge)
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
|
- if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
|
|
+ if (SLP_TREE_DEF_TYPE (child) != vect_external_def
|
|
+ && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
|
|
vect_detect_hybrid_slp_stmts (child, i, stype, visited);
|
|
}
|
|
|
|
@@ -2514,8 +2658,15 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
|
|
VF divided by the number of elements in a vector. */
|
|
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
|
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
|
- SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
|
- = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
|
|
+ {
|
|
+ for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
|
|
+ if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
|
|
+ {
|
|
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
|
+ = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
else
|
|
{
|
|
poly_uint64 vf;
|
|
@@ -2533,6 +2684,39 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
|
|
return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
|
|
}
|
|
|
|
+/* Try to build NODE from scalars, returning true on success.
|
|
+ NODE_INSTANCE is the SLP instance that contains NODE. */
|
|
+
|
|
+static bool
|
|
+vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
|
|
+ slp_instance node_instance)
|
|
+{
|
|
+ stmt_vec_info stmt_info;
|
|
+ unsigned int i;
|
|
+
|
|
+ if (!is_a <bb_vec_info> (vinfo)
|
|
+ || node == SLP_INSTANCE_TREE (node_instance)
|
|
+ || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
|
|
+ return false;
|
|
+
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "Building vector operands from scalars instead\n");
|
|
+
|
|
+ /* Don't remove and free the child nodes here, since they could be
|
|
+ referenced by other structures. The analysis and scheduling phases
|
|
+ (need to) ignore child nodes of anything that isn't vect_internal_def. */
|
|
+ unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
|
+ SLP_TREE_DEF_TYPE (node) = vect_external_def;
|
|
+ SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
|
|
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
|
+ {
|
|
+ tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
|
|
+ SLP_TREE_SCALAR_OPS (node)[i] = lhs;
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
/* Analyze statements contained in SLP tree NODE after recursively analyzing
|
|
the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
|
|
|
|
@@ -2559,6 +2743,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
|
{
|
|
SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
|
= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
|
|
+ /* Cope with cases in which we made a late decision to build the
|
|
+ node from scalars. */
|
|
+ if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def
|
|
+ && vect_slp_convert_to_external (vinfo, node, node_instance))
|
|
+ ;
|
|
+ else
|
|
+ gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader));
|
|
return true;
|
|
}
|
|
|
|
@@ -2579,25 +2770,31 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
|
auto_vec<vect_def_type, 4> dt;
|
|
dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
|
- dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
|
|
+ if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
|
+ dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
|
|
|
|
/* Push SLP node def-type to stmt operands. */
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
|
- if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
|
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
|
|
+ && SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
|
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
|
= SLP_TREE_DEF_TYPE (child);
|
|
|
|
/* Check everything worked out. */
|
|
bool res = true;
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
|
- if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
|
- {
|
|
- if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
|
- != SLP_TREE_DEF_TYPE (child))
|
|
- res = false;
|
|
- }
|
|
- else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
|
|
- res = false;
|
|
+ if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
|
+ {
|
|
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
|
+ {
|
|
+ if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
|
+ != SLP_TREE_DEF_TYPE (child))
|
|
+ res = false;
|
|
+ }
|
|
+ else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
|
+ != dt[j])
|
|
+ res = false;
|
|
+ }
|
|
if (!res && dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: same operand with different "
|
|
@@ -2609,7 +2806,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
|
|
|
/* Restore def-types. */
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
|
- STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
|
|
+ if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
|
+ STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
|
|
+
|
|
+ /* If this node can't be vectorized, try pruning the tree here rather
|
|
+ than felling the whole thing. */
|
|
+ if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
|
|
+ res = true;
|
|
|
|
return res;
|
|
}
|
|
@@ -2818,19 +3021,17 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
|
return true;
|
|
}
|
|
|
|
-/* Check if the basic block can be vectorized. Returns a bb_vec_info
|
|
- if so and sets fatal to true if failure is independent of
|
|
- current_vector_size. */
|
|
+/* Check if the region described by BB_VINFO can be vectorized, returning
|
|
+ true if so. When returning false, set FATAL to true if the same failure
|
|
+ would prevent vectorization at other vector sizes, false if it is still
|
|
+ worth trying other sizes. N_STMTS is the number of statements in the
|
|
+ region. */
|
|
|
|
-static bb_vec_info
|
|
-vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
- gimple_stmt_iterator region_end,
|
|
- vec<data_reference_p> datarefs, int n_stmts,
|
|
- bool &fatal, vec_info_shared *shared)
|
|
+static bool
|
|
+vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
|
|
{
|
|
DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
|
|
|
|
- bb_vec_info bb_vinfo;
|
|
slp_instance instance;
|
|
int i;
|
|
poly_uint64 min_vf = 2;
|
|
@@ -2838,34 +3039,15 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
/* The first group of checks is independent of the vector size. */
|
|
fatal = true;
|
|
|
|
- if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "not vectorized: too many instructions in "
|
|
- "basic block.\n");
|
|
- free_data_refs (datarefs);
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
|
|
- if (!bb_vinfo)
|
|
- return NULL;
|
|
-
|
|
- BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
|
|
- bb_vinfo->shared->save_datarefs ();
|
|
-
|
|
/* Analyze the data references. */
|
|
|
|
- if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
|
|
+ if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: unhandled data-ref in basic "
|
|
"block.\n");
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
|
|
@@ -2874,9 +3056,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: not enough data-refs in "
|
|
"basic block.\n");
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
if (!vect_analyze_data_ref_accesses (bb_vinfo))
|
|
@@ -2885,9 +3065,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: unhandled data access in "
|
|
"basic block.\n");
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
/* If there are no grouped stores in the region there is no need
|
|
@@ -2899,9 +3077,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: no grouped stores in "
|
|
"basic block.\n");
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
/* While the rest of the analysis below depends on it in some way. */
|
|
@@ -2921,9 +3097,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
"not vectorized: failed to find SLP opportunities "
|
|
"in basic block.\n");
|
|
}
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
vect_record_base_alignments (bb_vinfo);
|
|
@@ -2954,19 +3128,14 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
i++;
|
|
}
|
|
if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
|
|
- {
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
- }
|
|
+ return false;
|
|
|
|
if (!vect_slp_analyze_operations (bb_vinfo))
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: bad operation in basic block.\n");
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
/* Cost model: check if the vectorization is worthwhile. */
|
|
@@ -2977,80 +3146,61 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
"not vectorized: vectorization is not "
|
|
"profitable.\n");
|
|
-
|
|
- delete bb_vinfo;
|
|
- return NULL;
|
|
+ return false;
|
|
}
|
|
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"Basic block will be vectorized using SLP\n");
|
|
-
|
|
- return bb_vinfo;
|
|
+ return true;
|
|
}
|
|
|
|
+/* Subroutine of vect_slp_bb. Try to vectorize the statements between
|
|
+ REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
|
|
+ on success. The region has N_STMTS statements and has the datarefs
|
|
+ given by DATAREFS. */
|
|
|
|
-/* Main entry for the BB vectorizer. Analyze and transform BB, returns
|
|
- true if anything in the basic-block was vectorized. */
|
|
-
|
|
-bool
|
|
-vect_slp_bb (basic_block bb)
|
|
+static bool
|
|
+vect_slp_bb_region (gimple_stmt_iterator region_begin,
|
|
+ gimple_stmt_iterator region_end,
|
|
+ vec<data_reference_p> datarefs,
|
|
+ unsigned int n_stmts)
|
|
{
|
|
bb_vec_info bb_vinfo;
|
|
- gimple_stmt_iterator gsi;
|
|
- bool any_vectorized = false;
|
|
- auto_vector_sizes vector_sizes;
|
|
+ auto_vector_modes vector_modes;
|
|
|
|
/* Autodetect first vector size we try. */
|
|
- current_vector_size = 0;
|
|
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
|
|
- unsigned int next_size = 0;
|
|
+ machine_mode next_vector_mode = VOIDmode;
|
|
+ targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
|
|
+ unsigned int mode_i = 0;
|
|
|
|
- gsi = gsi_start_bb (bb);
|
|
+ vec_info_shared shared;
|
|
|
|
- poly_uint64 autodetected_vector_size = 0;
|
|
+ machine_mode autodetected_vector_mode = VOIDmode;
|
|
while (1)
|
|
{
|
|
- if (gsi_end_p (gsi))
|
|
- break;
|
|
-
|
|
- gimple_stmt_iterator region_begin = gsi;
|
|
- vec<data_reference_p> datarefs = vNULL;
|
|
- int insns = 0;
|
|
-
|
|
- for (; !gsi_end_p (gsi); gsi_next (&gsi))
|
|
- {
|
|
- gimple *stmt = gsi_stmt (gsi);
|
|
- if (is_gimple_debug (stmt))
|
|
- continue;
|
|
- insns++;
|
|
-
|
|
- if (gimple_location (stmt) != UNKNOWN_LOCATION)
|
|
- vect_location = stmt;
|
|
-
|
|
- if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
|
|
- break;
|
|
- }
|
|
-
|
|
- /* Skip leading unhandled stmts. */
|
|
- if (gsi_stmt (region_begin) == gsi_stmt (gsi))
|
|
- {
|
|
- gsi_next (&gsi);
|
|
- continue;
|
|
- }
|
|
-
|
|
- gimple_stmt_iterator region_end = gsi;
|
|
-
|
|
bool vectorized = false;
|
|
bool fatal = false;
|
|
- vec_info_shared shared;
|
|
- bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
|
|
- datarefs, insns, fatal, &shared);
|
|
- if (bb_vinfo
|
|
+ bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
|
|
+
|
|
+ bool first_time_p = shared.datarefs.is_empty ();
|
|
+ BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
|
|
+ if (first_time_p)
|
|
+ bb_vinfo->shared->save_datarefs ();
|
|
+ else
|
|
+ bb_vinfo->shared->check_datarefs ();
|
|
+ bb_vinfo->vector_mode = next_vector_mode;
|
|
+
|
|
+ if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
|
|
&& dbg_cnt (vect_slp))
|
|
{
|
|
if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
|
|
+ {
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Analysis succeeded with vector mode"
|
|
+ " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
|
|
+ dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
|
|
+ }
|
|
|
|
bb_vinfo->shared->check_datarefs ();
|
|
vect_schedule_slp (bb_vinfo);
|
|
@@ -3058,7 +3208,7 @@ vect_slp_bb (basic_block bb)
|
|
unsigned HOST_WIDE_INT bytes;
|
|
if (dump_enabled_p ())
|
|
{
|
|
- if (current_vector_size.is_constant (&bytes))
|
|
+ if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
|
|
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
|
|
"basic block part vectorized using %wu byte "
|
|
"vectors\n", bytes);
|
|
@@ -3070,50 +3220,120 @@ vect_slp_bb (basic_block bb)
|
|
|
|
vectorized = true;
|
|
}
|
|
- delete bb_vinfo;
|
|
+ else
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Analysis failed with vector mode %s\n",
|
|
+ GET_MODE_NAME (bb_vinfo->vector_mode));
|
|
+ }
|
|
|
|
- any_vectorized |= vectorized;
|
|
+ if (mode_i == 0)
|
|
+ autodetected_vector_mode = bb_vinfo->vector_mode;
|
|
|
|
- if (next_size == 0)
|
|
- autodetected_vector_size = current_vector_size;
|
|
+ if (!fatal)
|
|
+ while (mode_i < vector_modes.length ()
|
|
+ && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** The result for vector mode %s would"
|
|
+ " be the same\n",
|
|
+ GET_MODE_NAME (vector_modes[mode_i]));
|
|
+ mode_i += 1;
|
|
+ }
|
|
|
|
- if (next_size < vector_sizes.length ()
|
|
- && known_eq (vector_sizes[next_size], autodetected_vector_size))
|
|
- next_size += 1;
|
|
+ delete bb_vinfo;
|
|
+
|
|
+ if (mode_i < vector_modes.length ()
|
|
+ && VECTOR_MODE_P (autodetected_vector_mode)
|
|
+ && (related_vector_mode (vector_modes[mode_i],
|
|
+ GET_MODE_INNER (autodetected_vector_mode))
|
|
+ == autodetected_vector_mode)
|
|
+ && (related_vector_mode (autodetected_vector_mode,
|
|
+ GET_MODE_INNER (vector_modes[mode_i]))
|
|
+ == vector_modes[mode_i]))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Skipping vector mode %s, which would"
|
|
+ " repeat the analysis for %s\n",
|
|
+ GET_MODE_NAME (vector_modes[mode_i]),
|
|
+ GET_MODE_NAME (autodetected_vector_mode));
|
|
+ mode_i += 1;
|
|
+ }
|
|
|
|
if (vectorized
|
|
- || next_size == vector_sizes.length ()
|
|
- || known_eq (current_vector_size, 0U)
|
|
+ || mode_i == vector_modes.length ()
|
|
+ || autodetected_vector_mode == VOIDmode
|
|
/* If vect_slp_analyze_bb_1 signaled that analysis for all
|
|
vector sizes will fail do not bother iterating. */
|
|
|| fatal)
|
|
+ return vectorized;
|
|
+
|
|
+ /* Try the next biggest vector size. */
|
|
+ next_vector_mode = vector_modes[mode_i++];
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "***** Re-trying analysis with vector mode %s\n",
|
|
+ GET_MODE_NAME (next_vector_mode));
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Main entry for the BB vectorizer. Analyze and transform BB, returns
|
|
+ true if anything in the basic-block was vectorized. */
|
|
+
|
|
+bool
|
|
+vect_slp_bb (basic_block bb)
|
|
+{
|
|
+ gimple_stmt_iterator gsi;
|
|
+ bool any_vectorized = false;
|
|
+
|
|
+ gsi = gsi_start_bb (bb);
|
|
+ while (!gsi_end_p (gsi))
|
|
+ {
|
|
+ gimple_stmt_iterator region_begin = gsi;
|
|
+ vec<data_reference_p> datarefs = vNULL;
|
|
+ int insns = 0;
|
|
+
|
|
+ for (; !gsi_end_p (gsi); gsi_next (&gsi))
|
|
{
|
|
- if (gsi_end_p (region_end))
|
|
+ gimple *stmt = gsi_stmt (gsi);
|
|
+ if (is_gimple_debug (stmt))
|
|
+ continue;
|
|
+ insns++;
|
|
+
|
|
+ if (gimple_location (stmt) != UNKNOWN_LOCATION)
|
|
+ vect_location = stmt;
|
|
+
|
|
+ if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
|
|
break;
|
|
+ }
|
|
|
|
- /* Skip the unhandled stmt. */
|
|
+ /* Skip leading unhandled stmts. */
|
|
+ if (gsi_stmt (region_begin) == gsi_stmt (gsi))
|
|
+ {
|
|
gsi_next (&gsi);
|
|
-
|
|
- /* And reset vector sizes. */
|
|
- current_vector_size = 0;
|
|
- next_size = 0;
|
|
+ continue;
|
|
}
|
|
- else
|
|
+
|
|
+ gimple_stmt_iterator region_end = gsi;
|
|
+
|
|
+ if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
|
|
{
|
|
- /* Try the next biggest vector size. */
|
|
- current_vector_size = vector_sizes[next_size++];
|
|
if (dump_enabled_p ())
|
|
- {
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "***** Re-trying analysis with "
|
|
- "vector size ");
|
|
- dump_dec (MSG_NOTE, current_vector_size);
|
|
- dump_printf (MSG_NOTE, "\n");
|
|
- }
|
|
-
|
|
- /* Start over. */
|
|
- gsi = region_begin;
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "not vectorized: too many instructions in "
|
|
+ "basic block.\n");
|
|
}
|
|
+ else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
|
|
+ any_vectorized = true;
|
|
+
|
|
+ if (gsi_end_p (region_end))
|
|
+ break;
|
|
+
|
|
+ /* Skip the unhandled stmt. */
|
|
+ gsi_next (&gsi);
|
|
}
|
|
|
|
return any_vectorized;
|
|
@@ -3184,8 +3404,9 @@ vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
|
|
to cut down on the number of interleaves. */
|
|
|
|
void
|
|
-duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
|
|
- unsigned int nresults, vec<tree> &results)
|
|
+duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
|
|
+ vec<tree> elts, unsigned int nresults,
|
|
+ vec<tree> &results)
|
|
{
|
|
unsigned int nelts = elts.length ();
|
|
tree element_type = TREE_TYPE (vector_type);
|
|
@@ -3194,7 +3415,7 @@ duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
|
|
unsigned int nvectors = 1;
|
|
tree new_vector_type;
|
|
tree permutes[2];
|
|
- if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
|
|
+ if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
|
|
&nvectors, &new_vector_type,
|
|
permutes))
|
|
gcc_unreachable ();
|
|
@@ -3276,52 +3497,45 @@ duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
|
|
|
|
/* For constant and loop invariant defs of SLP_NODE this function returns
|
|
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
|
|
- OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
|
|
- scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
|
|
- REDUC_INDEX is the index of the reduction operand in the statements, unless
|
|
- it is -1. */
|
|
+ OP_NODE determines the node for the operand containing the scalar
|
|
+ operands. */
|
|
|
|
static void
|
|
-vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|
- vec<tree> *vec_oprnds,
|
|
- unsigned int op_num, unsigned int number_of_vectors)
|
|
+vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
|
|
+ vec<tree> *vec_oprnds)
|
|
{
|
|
- vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
|
|
- stmt_vec_info stmt_vinfo = stmts[0];
|
|
- gimple *stmt = stmt_vinfo->stmt;
|
|
+ stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
|
|
+ vec_info *vinfo = stmt_vinfo->vinfo;
|
|
unsigned HOST_WIDE_INT nunits;
|
|
tree vec_cst;
|
|
unsigned j, number_of_places_left_in_vector;
|
|
tree vector_type;
|
|
tree vop;
|
|
- int group_size = stmts.length ();
|
|
+ int group_size = op_node->ops.length ();
|
|
unsigned int vec_num, i;
|
|
unsigned number_of_copies = 1;
|
|
- vec<tree> voprnds;
|
|
- voprnds.create (number_of_vectors);
|
|
- bool constant_p, is_store;
|
|
+ bool constant_p;
|
|
tree neutral_op = NULL;
|
|
- enum tree_code code = gimple_expr_code (stmt);
|
|
gimple_seq ctor_seq = NULL;
|
|
auto_vec<tree, 16> permute_results;
|
|
|
|
+ /* ??? SLP analysis should compute the vector type for the
|
|
+ constant / invariant and store it in the SLP node. */
|
|
+ tree op = op_node->ops[0];
|
|
/* Check if vector type is a boolean vector. */
|
|
+ tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
|
|
if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
|
|
&& vect_mask_constant_operand_p (stmt_vinfo))
|
|
- vector_type
|
|
- = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
|
|
- else
|
|
- vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
|
-
|
|
- if (STMT_VINFO_DATA_REF (stmt_vinfo))
|
|
- {
|
|
- is_store = true;
|
|
- op = gimple_assign_rhs1 (stmt);
|
|
- }
|
|
+ vector_type = truth_type_for (stmt_vectype);
|
|
else
|
|
- is_store = false;
|
|
+ vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
|
|
|
|
- gcc_assert (op);
|
|
+ unsigned int number_of_vectors
|
|
+ = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
|
|
+ * TYPE_VECTOR_SUBPARTS (stmt_vectype),
|
|
+ vector_type);
|
|
+ vec_oprnds->create (number_of_vectors);
|
|
+ auto_vec<tree> voprnds (number_of_vectors);
|
|
|
|
/* NUMBER_OF_COPIES is the number of times we need to use the same values in
|
|
created vectors. It is greater than 1 if unrolling is performed.
|
|
@@ -3353,56 +3567,8 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|
bool place_after_defs = false;
|
|
for (j = 0; j < number_of_copies; j++)
|
|
{
|
|
- for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
|
|
+ for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
|
|
{
|
|
- stmt = stmt_vinfo->stmt;
|
|
- if (is_store)
|
|
- op = gimple_assign_rhs1 (stmt);
|
|
- else
|
|
- {
|
|
- switch (code)
|
|
- {
|
|
- case COND_EXPR:
|
|
- {
|
|
- tree cond = gimple_assign_rhs1 (stmt);
|
|
- if (TREE_CODE (cond) == SSA_NAME)
|
|
- op = gimple_op (stmt, op_num + 1);
|
|
- else if (op_num == 0 || op_num == 1)
|
|
- op = TREE_OPERAND (cond, op_num);
|
|
- else
|
|
- {
|
|
- if (op_num == 2)
|
|
- op = gimple_assign_rhs2 (stmt);
|
|
- else
|
|
- op = gimple_assign_rhs3 (stmt);
|
|
- }
|
|
- }
|
|
- break;
|
|
-
|
|
- case CALL_EXPR:
|
|
- op = gimple_call_arg (stmt, op_num);
|
|
- break;
|
|
-
|
|
- case LSHIFT_EXPR:
|
|
- case RSHIFT_EXPR:
|
|
- case LROTATE_EXPR:
|
|
- case RROTATE_EXPR:
|
|
- op = gimple_op (stmt, op_num + 1);
|
|
- /* Unlike the other binary operators, shifts/rotates have
|
|
- the shift count being int, instead of the same type as
|
|
- the lhs, so make sure the scalar is the right type if
|
|
- we are dealing with vectors of
|
|
- long long/long/short/char. */
|
|
- if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
|
|
- op = fold_convert (TREE_TYPE (vector_type), op);
|
|
- break;
|
|
-
|
|
- default:
|
|
- op = gimple_op (stmt, op_num + 1);
|
|
- break;
|
|
- }
|
|
- }
|
|
-
|
|
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
|
number_of_places_left_in_vector--;
|
|
tree orig_op = op;
|
|
@@ -3472,9 +3638,9 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|
vec_cst = gimple_build_vector (&ctor_seq, &elts);
|
|
else
|
|
{
|
|
- if (vec_oprnds->is_empty ())
|
|
- duplicate_and_interleave (&ctor_seq, vector_type, elts,
|
|
- number_of_vectors,
|
|
+ if (permute_results.is_empty ())
|
|
+ duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
|
|
+ elts, number_of_vectors,
|
|
permute_results);
|
|
vec_cst = permute_results[number_of_vectors - j - 1];
|
|
}
|
|
@@ -3516,8 +3682,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|
vec_oprnds->quick_push (vop);
|
|
}
|
|
|
|
- voprnds.release ();
|
|
-
|
|
/* In case that VF is greater than the unrolling factor needed for the SLP
|
|
group of stmts, NUMBER_OF_VECTORS to be created is greater than
|
|
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
|
|
@@ -3548,25 +3712,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|
static void
|
|
vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
|
|
{
|
|
- tree vec_oprnd;
|
|
stmt_vec_info vec_def_stmt_info;
|
|
unsigned int i;
|
|
|
|
gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
|
|
|
|
FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
|
|
- {
|
|
- gcc_assert (vec_def_stmt_info);
|
|
- if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
|
|
- vec_oprnd = gimple_phi_result (vec_def_phi);
|
|
- else
|
|
- vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
|
|
- vec_oprnds->quick_push (vec_oprnd);
|
|
- }
|
|
+ vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
|
|
}
|
|
|
|
|
|
-/* Get vectorized definitions for SLP_NODE.
|
|
+/* Get N vectorized definitions for SLP_NODE.
|
|
If the scalar definitions are loop invariants or constants, collect them and
|
|
call vect_get_constant_vectors() to create vector stmts.
|
|
Otherwise, the def-stmts must be already vectorized and the vectorized stmts
|
|
@@ -3574,91 +3730,26 @@ vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
|
|
vect_get_slp_vect_defs () to retrieve them. */
|
|
|
|
void
|
|
-vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
|
|
- vec<vec<tree> > *vec_oprnds)
|
|
+vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
|
|
{
|
|
- int number_of_vects = 0, i;
|
|
- unsigned int child_index = 0;
|
|
- HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
|
|
- slp_tree child = NULL;
|
|
- vec<tree> vec_defs;
|
|
- tree oprnd;
|
|
- bool vectorized_defs;
|
|
+ if (n == -1U)
|
|
+ n = SLP_TREE_CHILDREN (slp_node).length ();
|
|
|
|
- stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
|
|
- FOR_EACH_VEC_ELT (ops, i, oprnd)
|
|
+ for (unsigned i = 0; i < n; ++i)
|
|
{
|
|
- /* For each operand we check if it has vectorized definitions in a child
|
|
- node or we need to create them (for invariants and constants). We
|
|
- check if the LHS of the first stmt of the next child matches OPRND.
|
|
- If it does, we found the correct child. Otherwise, we call
|
|
- vect_get_constant_vectors (), and not advance CHILD_INDEX in order
|
|
- to check this child node for the next operand. */
|
|
- vectorized_defs = false;
|
|
- if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
|
|
- {
|
|
- child = SLP_TREE_CHILDREN (slp_node)[child_index];
|
|
-
|
|
- /* We have to check both pattern and original def, if available. */
|
|
- if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
|
- {
|
|
- stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
|
|
- stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
|
|
- tree first_def_op;
|
|
-
|
|
- if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
|
|
- first_def_op = gimple_phi_result (first_def);
|
|
- else
|
|
- first_def_op = gimple_get_lhs (first_def_info->stmt);
|
|
- if (operand_equal_p (oprnd, first_def_op, 0)
|
|
- || (related
|
|
- && operand_equal_p (oprnd,
|
|
- gimple_get_lhs (related->stmt), 0)))
|
|
- {
|
|
- /* The number of vector defs is determined by the number of
|
|
- vector statements in the node from which we get those
|
|
- statements. */
|
|
- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
|
|
- vectorized_defs = true;
|
|
- child_index++;
|
|
- }
|
|
- }
|
|
- else
|
|
- child_index++;
|
|
- }
|
|
-
|
|
- if (!vectorized_defs)
|
|
- {
|
|
- if (i == 0)
|
|
- {
|
|
- number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
|
- /* Number of vector stmts was calculated according to LHS in
|
|
- vect_schedule_slp_instance (), fix it by replacing LHS with
|
|
- RHS, if necessary. See vect_get_smallest_scalar_type () for
|
|
- details. */
|
|
- vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
|
|
- &rhs_size_unit);
|
|
- if (rhs_size_unit != lhs_size_unit)
|
|
- {
|
|
- number_of_vects *= rhs_size_unit;
|
|
- number_of_vects /= lhs_size_unit;
|
|
- }
|
|
- }
|
|
- }
|
|
+ slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
|
|
|
|
- /* Allocate memory for vectorized defs. */
|
|
- vec_defs = vNULL;
|
|
- vec_defs.create (number_of_vects);
|
|
+ vec<tree> vec_defs = vNULL;
|
|
|
|
- /* For reduction defs we call vect_get_constant_vectors (), since we are
|
|
- looking for initial loop invariant values. */
|
|
- if (vectorized_defs)
|
|
- /* The defs are already vectorized. */
|
|
- vect_get_slp_vect_defs (child, &vec_defs);
|
|
+ /* For each operand we check if it has vectorized definitions in a child
|
|
+ node or we need to create them (for invariants and constants). */
|
|
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
|
+ {
|
|
+ vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
|
|
+ vect_get_slp_vect_defs (child, &vec_defs);
|
|
+ }
|
|
else
|
|
- /* Build vectors from scalar defs. */
|
|
- vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
|
|
- number_of_vects);
|
|
+ vect_get_constant_vectors (child, slp_node, &vec_defs);
|
|
|
|
vec_oprnds->quick_push (vec_defs);
|
|
}
|
|
@@ -3939,17 +4030,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
|
stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
|
|
si = gsi_for_stmt (last_stmt_info->stmt);
|
|
|
|
- /* Mark the first element of the reduction chain as reduction to properly
|
|
- transform the node. In the analysis phase only the last element of the
|
|
- chain is marked as reduction. */
|
|
- if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
|
- && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
|
- && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
|
|
- {
|
|
- STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
|
|
- STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
|
|
- }
|
|
-
|
|
/* Handle two-operation SLP nodes by vectorizing the group with
|
|
both operations and then performing a merge. */
|
|
if (SLP_TREE_TWO_OPERATORS (node))
|
|
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
|
index 74abfbfe56e..5d6da3d9708 100644
|
|
--- a/gcc/tree-vect-stmts.c
|
|
+++ b/gcc/tree-vect-stmts.c
|
|
@@ -329,13 +329,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
|
|
basic_block bb = gimple_bb (USE_STMT (use_p));
|
|
if (!flow_bb_inside_loop_p (loop, bb))
|
|
{
|
|
+ if (is_gimple_debug (USE_STMT (use_p)))
|
|
+ continue;
|
|
+
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"vec_stmt_relevant_p: used out of loop.\n");
|
|
|
|
- if (is_gimple_debug (USE_STMT (use_p)))
|
|
- continue;
|
|
-
|
|
/* We expect all such uses to be in the loop exit phis
|
|
(because of loop closed form) */
|
|
gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
|
|
@@ -456,7 +456,6 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
|
|
bool force)
|
|
{
|
|
stmt_vec_info dstmt_vinfo;
|
|
- basic_block bb, def_bb;
|
|
enum vect_def_type dt;
|
|
|
|
/* case 1: we are only interested in uses that need to be vectorized. Uses
|
|
@@ -472,28 +471,8 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
|
|
if (!dstmt_vinfo)
|
|
return opt_result::success ();
|
|
|
|
- def_bb = gimple_bb (dstmt_vinfo->stmt);
|
|
-
|
|
- /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
|
|
- DSTMT_VINFO must have already been processed, because this should be the
|
|
- only way that STMT, which is a reduction-phi, was put in the worklist,
|
|
- as there should be no other uses for DSTMT_VINFO in the loop. So we just
|
|
- check that everything is as expected, and we are done. */
|
|
- bb = gimple_bb (stmt_vinfo->stmt);
|
|
- if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
|
|
- && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
|
|
- && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
|
|
- && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
|
|
- && bb->loop_father == def_bb->loop_father)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "reduc-stmt defining reduc-phi in the same nest.\n");
|
|
- gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
|
|
- gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
|
|
- || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
|
|
- return opt_result::success ();
|
|
- }
|
|
+ basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
|
|
+ basic_block bb = gimple_bb (stmt_vinfo->stmt);
|
|
|
|
/* case 3a: outer-loop stmt defining an inner-loop stmt:
|
|
outer-loop-header-bb:
|
|
@@ -607,7 +586,7 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
|
|
This pass detects such stmts. */
|
|
|
|
opt_result
|
|
-vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
|
+vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
|
|
{
|
|
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
|
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
|
|
@@ -777,7 +756,11 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
|
= process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
|
|
&worklist, true);
|
|
if (!res)
|
|
- return res;
|
|
+ {
|
|
+ if (fatal)
|
|
+ *fatal = false;
|
|
+ return res;
|
|
+ }
|
|
}
|
|
} /* while worklist */
|
|
|
|
@@ -791,6 +774,7 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
|
|
unsigned opno, enum vect_def_type dt,
|
|
stmt_vector_for_cost *cost_vec)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
|
|
tree op = gimple_op (stmt, opno);
|
|
unsigned prologue_cost = 0;
|
|
@@ -798,7 +782,7 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
|
|
/* Without looking at the actual initializer a vector of
|
|
constants can be implemented as load from the constant pool.
|
|
When all elements are the same we can use a splat. */
|
|
- tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
|
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
|
|
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
|
unsigned num_vects_to_check;
|
|
unsigned HOST_WIDE_INT const_nunits;
|
|
@@ -1603,9 +1587,9 @@ vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
|
|
vector_type = vectype;
|
|
else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
|
|
&& VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
|
|
- vector_type = build_same_sized_truth_vector_type (stmt_vectype);
|
|
+ vector_type = truth_type_for (stmt_vectype);
|
|
else
|
|
- vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
|
+ vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
|
|
|
|
gcc_assert (vector_type);
|
|
return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
|
|
@@ -1720,16 +1704,8 @@ vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
|
|
{
|
|
if (slp_node)
|
|
{
|
|
- int nops = (op1 == NULL_TREE) ? 1 : 2;
|
|
- auto_vec<tree> ops (nops);
|
|
- auto_vec<vec<tree> > vec_defs (nops);
|
|
-
|
|
- ops.quick_push (op0);
|
|
- if (op1)
|
|
- ops.quick_push (op1);
|
|
-
|
|
- vect_get_slp_defs (ops, slp_node, &vec_defs);
|
|
-
|
|
+ auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
|
|
+ vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
|
|
*vec_oprnds0 = vec_defs[0];
|
|
if (op1)
|
|
*vec_oprnds1 = vec_defs[1];
|
|
@@ -1874,7 +1850,8 @@ static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
|
|
says how the load or store is going to be implemented and GROUP_SIZE
|
|
is the number of load or store statements in the containing group.
|
|
If the access is a gather load or scatter store, GS_INFO describes
|
|
- its arguments.
|
|
+ its arguments. If the load or store is conditional, SCALAR_MASK is the
|
|
+ condition under which it occurs.
|
|
|
|
Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
|
|
supported, otherwise record the required mask types. */
|
|
@@ -1883,7 +1860,7 @@ static void
|
|
check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|
vec_load_store_type vls_type, int group_size,
|
|
vect_memory_access_type memory_access_type,
|
|
- gather_scatter_info *gs_info)
|
|
+ gather_scatter_info *gs_info, tree scalar_mask)
|
|
{
|
|
/* Invariant loads need no special support. */
|
|
if (memory_access_type == VMAT_INVARIANT)
|
|
@@ -1907,7 +1884,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|
return;
|
|
}
|
|
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
|
- vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
|
|
+ vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
|
|
return;
|
|
}
|
|
|
|
@@ -1931,7 +1908,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|
return;
|
|
}
|
|
unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
|
- vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
|
|
+ vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
|
|
return;
|
|
}
|
|
|
|
@@ -1949,9 +1926,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|
}
|
|
|
|
machine_mode mask_mode;
|
|
- if (!(targetm.vectorize.get_mask_mode
|
|
- (GET_MODE_NUNITS (vecmode),
|
|
- GET_MODE_SIZE (vecmode)).exists (&mask_mode))
|
|
+ if (!VECTOR_MODE_P (vecmode)
|
|
+ || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
|
|
|| !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -1969,7 +1945,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
|
|
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
|
unsigned int nvectors;
|
|
if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
|
|
- vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
|
|
+ vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
@@ -2311,6 +2287,29 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
|
|
&& gap < (vect_known_alignment_in_bytes (first_dr_info)
|
|
/ vect_get_scalar_dr_size (first_dr_info)))
|
|
overrun_p = false;
|
|
+
|
|
+ /* If the gap splits the vector in half and the target
|
|
+ can do half-vector operations avoid the epilogue peeling
|
|
+ by simply loading half of the vector only. Usually
|
|
+ the construction with an upper zero half will be elided. */
|
|
+ dr_alignment_support alignment_support_scheme;
|
|
+ scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
|
|
+ machine_mode vmode;
|
|
+ if (overrun_p
|
|
+ && !masked_p
|
|
+ && (((alignment_support_scheme
|
|
+ = vect_supportable_dr_alignment (first_dr_info, false)))
|
|
+ == dr_aligned
|
|
+ || alignment_support_scheme == dr_unaligned_supported)
|
|
+ && known_eq (nunits, (group_size - gap) * 2)
|
|
+ && known_eq (nunits, group_size)
|
|
+ && related_vector_mode (TYPE_MODE (vectype), elmode,
|
|
+ group_size - gap).exists (&vmode)
|
|
+ && (convert_optab_handler (vec_init_optab,
|
|
+ TYPE_MODE (vectype), vmode)
|
|
+ != CODE_FOR_nothing))
|
|
+ overrun_p = false;
|
|
+
|
|
if (overrun_p && !can_overrun_p)
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -2536,6 +2535,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
|
|
vect_def_type *mask_dt_out,
|
|
tree *mask_vectype_out)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -2564,7 +2564,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
|
|
|
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
|
if (!mask_vectype)
|
|
- mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
|
|
+ mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
|
|
|
|
if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
|
|
{
|
|
@@ -2728,7 +2728,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|
|| TREE_CODE (masktype) == INTEGER_TYPE
|
|
|| types_compatible_p (srctype, masktype)));
|
|
if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
|
|
- masktype = build_same_sized_truth_vector_type (srctype);
|
|
+ masktype = truth_type_for (srctype);
|
|
|
|
tree mask_halftype = masktype;
|
|
tree perm_mask = NULL_TREE;
|
|
@@ -2774,8 +2774,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|
mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
|
|
}
|
|
else if (mask)
|
|
- mask_halftype
|
|
- = build_same_sized_truth_vector_type (gs_info->offset_vectype);
|
|
+ mask_halftype = truth_type_for (gs_info->offset_vectype);
|
|
}
|
|
else
|
|
gcc_unreachable ();
|
|
@@ -2952,6 +2951,7 @@ vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
|
|
gather_scatter_info *gs_info,
|
|
tree *dataref_ptr, tree *vec_offset)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
gimple_seq stmts = NULL;
|
|
*dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
|
|
if (stmts != NULL)
|
|
@@ -2962,7 +2962,7 @@ vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
|
|
gcc_assert (!new_bb);
|
|
}
|
|
tree offset_type = TREE_TYPE (gs_info->offset);
|
|
- tree offset_vectype = get_vectype_for_scalar_type (offset_type);
|
|
+ tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
|
|
*vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
|
|
offset_vectype);
|
|
}
|
|
@@ -2997,7 +2997,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
|
|
/* The offset given in GS_INFO can have pointer type, so use the element
|
|
type of the vector instead. */
|
|
tree offset_type = TREE_TYPE (gs_info->offset);
|
|
- tree offset_vectype = get_vectype_for_scalar_type (offset_type);
|
|
+ tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type);
|
|
offset_type = TREE_TYPE (offset_vectype);
|
|
|
|
/* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
|
|
@@ -3161,8 +3161,7 @@ simple_integer_narrowing (tree vectype_out, tree vectype_in,
|
|
int multi_step_cvt = 0;
|
|
auto_vec <tree, 8> interm_types;
|
|
if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
|
|
- &code, &multi_step_cvt,
|
|
- &interm_types)
|
|
+ &code, &multi_step_cvt, &interm_types)
|
|
|| multi_step_cvt)
|
|
return false;
|
|
|
|
@@ -3295,10 +3294,10 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
return false;
|
|
}
|
|
}
|
|
- /* If all arguments are external or constant defs use a vector type with
|
|
- the same size as the output vector type. */
|
|
+ /* If all arguments are external or constant defs, infer the vector type
|
|
+ from the scalar type. */
|
|
if (!vectype_in)
|
|
- vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
|
|
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
|
|
if (vec_stmt)
|
|
gcc_assert (vectype_in);
|
|
if (!vectype_in)
|
|
@@ -3309,6 +3308,19 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
|
|
return false;
|
|
}
|
|
+ /* FORNOW: we don't yet support mixtures of vector sizes for calls,
|
|
+ just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
|
|
+ are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
|
|
+ by a pack of the two vectors into an SI vector. We would need
|
|
+ separate code to handle direct VnDI->VnSI IFN_CTZs. */
|
|
+ if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "mismatched vector sizes %T and %T\n",
|
|
+ vectype_in, vectype_out);
|
|
+ return false;
|
|
+ }
|
|
|
|
/* FORNOW */
|
|
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
|
|
@@ -3415,7 +3427,9 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
unsigned int nvectors = (slp_node
|
|
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
|
|
: ncopies);
|
|
- vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
|
|
+ tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
|
|
+ vect_record_loop_mask (loop_vinfo, masks, nvectors,
|
|
+ vectype_out, scalar_mask);
|
|
}
|
|
return true;
|
|
}
|
|
@@ -3446,9 +3460,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
auto_vec<vec<tree> > vec_defs (nargs);
|
|
vec<tree> vec_oprnds0;
|
|
|
|
- for (i = 0; i < nargs; i++)
|
|
- vargs[i] = gimple_call_arg (stmt, i);
|
|
- vect_get_slp_defs (vargs, slp_node, &vec_defs);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds0 = vec_defs[0];
|
|
|
|
/* Arguments are ready. Create the new vector stmt. */
|
|
@@ -3470,8 +3482,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
= gimple_build_call_internal_vec (ifn, vargs);
|
|
gimple_call_set_lhs (call, half_res);
|
|
gimple_call_set_nothrow (call, true);
|
|
- new_stmt_info
|
|
- = vect_finish_stmt_generation (stmt_info, call, gsi);
|
|
+ vect_finish_stmt_generation (stmt_info, call, gsi);
|
|
if ((i & 1) == 0)
|
|
{
|
|
prev_res = half_res;
|
|
@@ -3523,8 +3534,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
if (mask_opno >= 0 && !vectypes[mask_opno])
|
|
{
|
|
gcc_assert (modifier != WIDEN);
|
|
- vectypes[mask_opno]
|
|
- = build_same_sized_truth_vector_type (vectype_in);
|
|
+ vectypes[mask_opno] = truth_type_for (vectype_in);
|
|
}
|
|
|
|
for (i = 0; i < nargs; i++)
|
|
@@ -3570,8 +3580,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
gcall *call = gimple_build_call_internal_vec (ifn, vargs);
|
|
gimple_call_set_lhs (call, half_res);
|
|
gimple_call_set_nothrow (call, true);
|
|
- new_stmt_info
|
|
- = vect_finish_stmt_generation (stmt_info, call, gsi);
|
|
+ vect_finish_stmt_generation (stmt_info, call, gsi);
|
|
if ((j & 1) == 0)
|
|
{
|
|
prev_res = half_res;
|
|
@@ -3622,9 +3631,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
auto_vec<vec<tree> > vec_defs (nargs);
|
|
vec<tree> vec_oprnds0;
|
|
|
|
- for (i = 0; i < nargs; i++)
|
|
- vargs.quick_push (gimple_call_arg (stmt, i));
|
|
- vect_get_slp_defs (vargs, slp_node, &vec_defs);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds0 = vec_defs[0];
|
|
|
|
/* Arguments are ready. Create the new vector stmt. */
|
|
@@ -4087,9 +4094,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
|
|
|| arginfo[i].dt == vect_external_def)
|
|
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
|
|
{
|
|
- arginfo[i].vectype
|
|
- = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
|
|
- i)));
|
|
+ tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
|
|
+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
|
|
if (arginfo[i].vectype == NULL
|
|
|| (simd_clone_subparts (arginfo[i].vectype)
|
|
> bestn->simdclone->simdlen))
|
|
@@ -4802,10 +4808,10 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
|
|
- /* If op0 is an external or constant defs use a vector type of
|
|
- the same size as the output vector type. */
|
|
+ /* If op0 is an external or constant def, infer the vector type
|
|
+ from the scalar type. */
|
|
if (!vectype_in)
|
|
- vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
|
|
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
|
|
if (vec_stmt)
|
|
gcc_assert (vectype_in);
|
|
if (!vectype_in)
|
|
@@ -4863,7 +4869,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
switch (modifier)
|
|
{
|
|
case NONE:
|
|
- if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
|
|
+ if (code != FIX_TRUNC_EXPR
|
|
+ && code != FLOAT_EXPR
|
|
+ && !CONVERT_EXPR_CODE_P (code))
|
|
return false;
|
|
if (supportable_convert_operation (code, vectype_out, vectype_in,
|
|
&decl1, &code1))
|
|
@@ -5452,7 +5460,7 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
either as shift by a scalar or by a vector. */
|
|
|
|
bool
|
|
-vect_supportable_shift (enum tree_code code, tree scalar_type)
|
|
+vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
|
|
{
|
|
|
|
machine_mode vec_mode;
|
|
@@ -5460,7 +5468,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
|
|
int icode;
|
|
tree vectype;
|
|
|
|
- vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
if (!vectype)
|
|
return false;
|
|
|
|
@@ -5491,7 +5499,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
|
|
stmt to replace it, put it in VEC_STMT, and insert it at GSI.
|
|
Return true if STMT_INFO is vectorizable in this way. */
|
|
|
|
-bool
|
|
+static bool
|
|
vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
stmt_vec_info *vec_stmt, slp_tree slp_node,
|
|
stmt_vector_for_cost *cost_vec)
|
|
@@ -5524,6 +5532,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
bool scalar_shift_arg = true;
|
|
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
|
vec_info *vinfo = stmt_info->vinfo;
|
|
+ bool incompatible_op1_vectype_p = false;
|
|
|
|
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
|
return false;
|
|
@@ -5565,10 +5574,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
"use not simple.\n");
|
|
return false;
|
|
}
|
|
- /* If op0 is an external or constant def use a vector type with
|
|
- the same size as the output vector type. */
|
|
+ /* If op0 is an external or constant def, infer the vector type
|
|
+ from the scalar type. */
|
|
if (!vectype)
|
|
- vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
|
|
if (vec_stmt)
|
|
gcc_assert (vectype);
|
|
if (!vectype)
|
|
@@ -5666,9 +5675,16 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
"vector/vector shift/rotate found.\n");
|
|
|
|
if (!op1_vectype)
|
|
- op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
|
|
- if (op1_vectype == NULL_TREE
|
|
- || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
|
|
+ op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
|
|
+ incompatible_op1_vectype_p
|
|
+ = (op1_vectype == NULL_TREE
|
|
+ || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
|
|
+ TYPE_VECTOR_SUBPARTS (vectype))
|
|
+ || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
|
|
+ if (incompatible_op1_vectype_p
|
|
+ && (!slp_node
|
|
+ || SLP_TREE_DEF_TYPE
|
|
+ (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
|
|
{
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
@@ -5707,7 +5723,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
so make sure the scalar is the right type if we are
|
|
dealing with vectors of long long/long/short/char. */
|
|
if (dt[1] == vect_constant_def)
|
|
- op1 = fold_convert (TREE_TYPE (vectype), op1);
|
|
+ {
|
|
+ if (!slp_node)
|
|
+ op1 = fold_convert (TREE_TYPE (vectype), op1);
|
|
+ }
|
|
else if (!useless_type_conversion_p (TREE_TYPE (vectype),
|
|
TREE_TYPE (op1)))
|
|
{
|
|
@@ -5818,6 +5837,21 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
}
|
|
+ else if (slp_node && incompatible_op1_vectype_p)
|
|
+ {
|
|
+ /* Convert the scalar constant shift amounts in-place. */
|
|
+ slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
|
|
+ gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
|
|
+ for (unsigned i = 0;
|
|
+ i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
|
|
+ {
|
|
+ SLP_TREE_SCALAR_OPS (shift)[i]
|
|
+ = fold_convert (TREE_TYPE (vectype),
|
|
+ SLP_TREE_SCALAR_OPS (shift)[i]);
|
|
+ gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
|
|
+ == INTEGER_CST));
|
|
+ }
|
|
+ }
|
|
|
|
/* vec_oprnd1 is available if operand 1 should be of a scalar-type
|
|
(a special case for certain kind of vector shifts); otherwise,
|
|
@@ -5894,7 +5928,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
poly_uint64 nunits_in;
|
|
poly_uint64 nunits_out;
|
|
tree vectype_out;
|
|
- int ncopies;
|
|
+ int ncopies, vec_num;
|
|
int j, i;
|
|
vec<tree> vec_oprnds0 = vNULL;
|
|
vec<tree> vec_oprnds1 = vNULL;
|
|
@@ -5964,8 +5998,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
"use not simple.\n");
|
|
return false;
|
|
}
|
|
- /* If op0 is an external or constant def use a vector type with
|
|
- the same size as the output vector type. */
|
|
+ /* If op0 is an external or constant def, infer the vector type
|
|
+ from the scalar type. */
|
|
if (!vectype)
|
|
{
|
|
/* For boolean type we cannot determine vectype by
|
|
@@ -5985,7 +6019,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
vectype = vectype_out;
|
|
}
|
|
else
|
|
- vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
|
|
}
|
|
if (vec_stmt)
|
|
gcc_assert (vectype);
|
|
@@ -6031,9 +6065,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
|
case of SLP. */
|
|
if (slp_node)
|
|
- ncopies = 1;
|
|
+ {
|
|
+ ncopies = 1;
|
|
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
|
+ }
|
|
else
|
|
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
|
+ {
|
|
+ ncopies = vect_get_num_copies (loop_vinfo, vectype);
|
|
+ vec_num = 1;
|
|
+ }
|
|
|
|
gcc_assert (ncopies >= 1);
|
|
|
|
@@ -6086,8 +6126,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
return false;
|
|
}
|
|
|
|
+ int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
|
|
+ vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
|
|
+ internal_fn cond_fn = get_conditional_internal_fn (code);
|
|
+
|
|
if (!vec_stmt) /* transformation not required. */
|
|
{
|
|
+ /* If this operation is part of a reduction, a fully-masked loop
|
|
+ should only change the active lanes of the reduction chain,
|
|
+ keeping the inactive lanes as-is. */
|
|
+ if (loop_vinfo
|
|
+ && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
|
|
+ && reduc_idx >= 0)
|
|
+ {
|
|
+ if (cond_fn == IFN_LAST
|
|
+ || !direct_internal_fn_supported_p (cond_fn, vectype,
|
|
+ OPTIMIZE_FOR_SPEED))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "can't use a fully-masked loop because no"
|
|
+ " conditional operation is available.\n");
|
|
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
|
|
+ }
|
|
+ else
|
|
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
|
|
+ vectype, NULL);
|
|
+ }
|
|
+
|
|
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
|
|
DUMP_VECT_SCOPE ("vectorizable_operation");
|
|
vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
|
|
@@ -6100,6 +6166,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
"transform binary/unary operation.\n");
|
|
|
|
+ bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
|
|
+
|
|
/* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
|
|
vectors with unsigned elements, but the result is signed. So, we
|
|
need to compute the MINUS_EXPR into vectype temporary and
|
|
@@ -6180,12 +6248,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
{
|
|
if (slp_node)
|
|
{
|
|
- auto_vec<tree> ops(3);
|
|
- ops.quick_push (op0);
|
|
- ops.quick_push (op1);
|
|
- ops.quick_push (op2);
|
|
auto_vec<vec<tree> > vec_defs(3);
|
|
- vect_get_slp_defs (ops, slp_node, &vec_defs);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds0 = vec_defs[0];
|
|
vec_oprnds1 = vec_defs[1];
|
|
vec_oprnds2 = vec_defs[2];
|
|
@@ -6221,22 +6285,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
? vec_oprnds1[i] : NULL_TREE);
|
|
vop2 = ((op_type == ternary_op)
|
|
? vec_oprnds2[i] : NULL_TREE);
|
|
- gassign *new_stmt = gimple_build_assign (vec_dest, code,
|
|
- vop0, vop1, vop2);
|
|
- new_temp = make_ssa_name (vec_dest, new_stmt);
|
|
- gimple_assign_set_lhs (new_stmt, new_temp);
|
|
- new_stmt_info
|
|
- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
|
- if (vec_cvt_dest)
|
|
+ if (masked_loop_p && reduc_idx >= 0)
|
|
{
|
|
- new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
|
|
- gassign *new_stmt
|
|
- = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
|
|
- new_temp);
|
|
- new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
|
|
+ /* Perform the operation on active elements only and take
|
|
+ inactive elements from the reduction chain input. */
|
|
+ gcc_assert (!vop2);
|
|
+ vop2 = reduc_idx == 1 ? vop1 : vop0;
|
|
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
|
|
+ vectype, i * ncopies + j);
|
|
+ gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
|
|
+ vop0, vop1, vop2);
|
|
+ new_temp = make_ssa_name (vec_dest, call);
|
|
+ gimple_call_set_lhs (call, new_temp);
|
|
+ gimple_call_set_nothrow (call, true);
|
|
+ new_stmt_info
|
|
+ = vect_finish_stmt_generation (stmt_info, call, gsi);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ gassign *new_stmt = gimple_build_assign (vec_dest, code,
|
|
+ vop0, vop1, vop2);
|
|
+ new_temp = make_ssa_name (vec_dest, new_stmt);
|
|
gimple_assign_set_lhs (new_stmt, new_temp);
|
|
new_stmt_info
|
|
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
|
+ if (vec_cvt_dest)
|
|
+ {
|
|
+ new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
|
|
+ gassign *new_stmt
|
|
+ = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
|
|
+ new_temp);
|
|
+ new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
|
|
+ gimple_assign_set_lhs (new_stmt, new_temp);
|
|
+ new_stmt_info
|
|
+ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
|
+ }
|
|
}
|
|
if (slp_node)
|
|
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
|
|
@@ -6517,7 +6600,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
if (loop_vinfo
|
|
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
|
check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
|
|
- memory_access_type, &gs_info);
|
|
+ memory_access_type, &gs_info, mask);
|
|
|
|
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
|
|
vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
|
|
@@ -6580,8 +6663,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
ncopies *= 2;
|
|
|
|
if (mask)
|
|
- mask_halfvectype
|
|
- = build_same_sized_truth_vector_type (gs_info.offset_vectype);
|
|
+ mask_halfvectype = truth_type_for (gs_info.offset_vectype);
|
|
}
|
|
else
|
|
gcc_unreachable ();
|
|
@@ -6840,9 +6922,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
of vector elts directly. */
|
|
scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
|
|
machine_mode vmode;
|
|
- if (!mode_for_vector (elmode, group_size).exists (&vmode)
|
|
- || !VECTOR_MODE_P (vmode)
|
|
- || !targetm.vector_mode_supported_p (vmode)
|
|
+ if (!related_vector_mode (TYPE_MODE (vectype), elmode,
|
|
+ group_size).exists (&vmode)
|
|
|| (convert_optab_handler (vec_extract_optab,
|
|
TYPE_MODE (vectype), vmode)
|
|
== CODE_FOR_nothing))
|
|
@@ -6859,9 +6940,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
element extracts from the original vector type and
|
|
element size stores. */
|
|
if (int_mode_for_size (lsize, 0).exists (&elmode)
|
|
- && mode_for_vector (elmode, lnunits).exists (&vmode)
|
|
- && VECTOR_MODE_P (vmode)
|
|
- && targetm.vector_mode_supported_p (vmode)
|
|
+ && related_vector_mode (TYPE_MODE (vectype), elmode,
|
|
+ lnunits).exists (&vmode)
|
|
&& (convert_optab_handler (vec_extract_optab,
|
|
vmode, elmode)
|
|
!= CODE_FOR_nothing))
|
|
@@ -7624,14 +7704,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
if (!scalar_dest)
|
|
return false;
|
|
|
|
- if (slp_node != NULL)
|
|
- {
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
- "SLP of masked loads not supported.\n");
|
|
- return false;
|
|
- }
|
|
-
|
|
int mask_index = internal_fn_mask_index (ifn);
|
|
if (mask_index >= 0)
|
|
{
|
|
@@ -7714,6 +7786,15 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
|
|
group_size = DR_GROUP_SIZE (first_stmt_info);
|
|
|
|
+ /* Refuse non-SLP vectorization of SLP-only groups. */
|
|
+ if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "cannot vectorize load in non-SLP mode.\n");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
|
|
slp_perm = true;
|
|
|
|
@@ -7767,7 +7848,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
if (loop_vinfo
|
|
&& LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
|
|
check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
|
|
- memory_access_type, &gs_info);
|
|
+ memory_access_type, &gs_info, mask);
|
|
|
|
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
|
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
|
|
@@ -7947,9 +8028,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
vector elts directly. */
|
|
scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
|
|
machine_mode vmode;
|
|
- if (mode_for_vector (elmode, group_size).exists (&vmode)
|
|
- && VECTOR_MODE_P (vmode)
|
|
- && targetm.vector_mode_supported_p (vmode)
|
|
+ if (related_vector_mode (TYPE_MODE (vectype), elmode,
|
|
+ group_size).exists (&vmode)
|
|
&& (convert_optab_handler (vec_init_optab,
|
|
TYPE_MODE (vectype), vmode)
|
|
!= CODE_FOR_nothing))
|
|
@@ -7973,9 +8053,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
/* If we can't construct such a vector fall back to
|
|
element loads of the original vector type. */
|
|
if (int_mode_for_size (lsize, 0).exists (&elmode)
|
|
- && mode_for_vector (elmode, lnunits).exists (&vmode)
|
|
- && VECTOR_MODE_P (vmode)
|
|
- && targetm.vector_mode_supported_p (vmode)
|
|
+ && related_vector_mode (TYPE_MODE (vectype), elmode,
|
|
+ lnunits).exists (&vmode)
|
|
&& (convert_optab_handler (vec_init_optab, vmode, elmode)
|
|
!= CODE_FOR_nothing))
|
|
{
|
|
@@ -8413,8 +8492,17 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
simd_lane_access_p,
|
|
byte_offset, bump);
|
|
if (mask)
|
|
- vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
|
|
- mask_vectype);
|
|
+ {
|
|
+ if (slp_node)
|
|
+ {
|
|
+ auto_vec<vec<tree> > vec_defs (1);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
+ vec_mask = vec_defs[0][0];
|
|
+ }
|
|
+ else
|
|
+ vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
|
|
+ mask_vectype);
|
|
+ }
|
|
}
|
|
else
|
|
{
|
|
@@ -8564,8 +8652,25 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
else
|
|
{
|
|
+ tree ltype = vectype;
|
|
+ /* If there's no peeling for gaps but we have a gap
|
|
+ with slp loads then load the lower half of the
|
|
+ vector only. See get_group_load_store_type for
|
|
+ when we apply this optimization. */
|
|
+ if (slp
|
|
+ && loop_vinfo
|
|
+ && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
|
+ && DR_GROUP_GAP (first_stmt_info) != 0
|
|
+ && known_eq (nunits,
|
|
+ (group_size
|
|
+ - DR_GROUP_GAP (first_stmt_info)) * 2)
|
|
+ && known_eq (nunits, group_size))
|
|
+ ltype = build_vector_type (TREE_TYPE (vectype),
|
|
+ (group_size
|
|
+ - DR_GROUP_GAP
|
|
+ (first_stmt_info)));
|
|
data_ref
|
|
- = fold_build2 (MEM_REF, vectype, dataref_ptr,
|
|
+ = fold_build2 (MEM_REF, ltype, dataref_ptr,
|
|
dataref_offset
|
|
? dataref_offset
|
|
: build_int_cst (ref_type, 0));
|
|
@@ -8579,6 +8684,23 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
TREE_TYPE (data_ref)
|
|
= build_aligned_type (TREE_TYPE (data_ref),
|
|
TYPE_ALIGN (elem_type));
|
|
+ if (ltype != vectype)
|
|
+ {
|
|
+ vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
|
|
+ tree tem = make_ssa_name (ltype);
|
|
+ new_stmt = gimple_build_assign (tem, data_ref);
|
|
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
|
+ data_ref = NULL;
|
|
+ vec<constructor_elt, va_gc> *v;
|
|
+ vec_alloc (v, 2);
|
|
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
|
|
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
|
|
+ build_zero_cst (ltype));
|
|
+ new_stmt
|
|
+ = gimple_build_assign (vec_dest,
|
|
+ build_constructor
|
|
+ (vectype, v));
|
|
+ }
|
|
}
|
|
break;
|
|
}
|
|
@@ -8864,7 +8986,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
|
|
scalar_type = build_nonstandard_integer_type
|
|
(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
|
|
TYPE_UNSIGNED (scalar_type));
|
|
- *comp_vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
}
|
|
|
|
return true;
|
|
@@ -8881,9 +9003,9 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
|
|
|
|
Return true if STMT_INFO is vectorizable in this way. */
|
|
|
|
-bool
|
|
+static bool
|
|
vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
- stmt_vec_info *vec_stmt, bool for_reduction,
|
|
+ stmt_vec_info *vec_stmt,
|
|
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
|
|
{
|
|
vec_info *vinfo = stmt_info->vinfo;
|
|
@@ -8913,22 +9035,39 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
tree vec_cmp_type;
|
|
bool masked = false;
|
|
|
|
- if (for_reduction && STMT_SLP_TYPE (stmt_info))
|
|
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
|
+ return false;
|
|
+
|
|
+ /* Is vectorizable conditional operation? */
|
|
+ gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
|
|
+ if (!stmt)
|
|
+ return false;
|
|
+
|
|
+ code = gimple_assign_rhs_code (stmt);
|
|
+ if (code != COND_EXPR)
|
|
return false;
|
|
|
|
- vect_reduction_type reduction_type
|
|
- = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
|
|
- if (reduction_type == TREE_CODE_REDUCTION)
|
|
+ stmt_vec_info reduc_info = NULL;
|
|
+ int reduc_index = -1;
|
|
+ vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
|
|
+ bool for_reduction
|
|
+ = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
|
|
+ if (for_reduction)
|
|
{
|
|
- if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
|
+ if (STMT_SLP_TYPE (stmt_info))
|
|
return false;
|
|
-
|
|
- if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
|
|
- && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|
|
- && for_reduction))
|
|
+ reduc_info = info_for_reduction (stmt_info);
|
|
+ reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
|
|
+ reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
|
|
+ gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
|
|
+ || reduc_index != -1);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
|
|
return false;
|
|
|
|
- /* FORNOW: not yet supported. */
|
|
+ /* FORNOW: only supported as part of a reduction. */
|
|
if (STMT_VINFO_LIVE_P (stmt_info))
|
|
{
|
|
if (dump_enabled_p ())
|
|
@@ -8938,16 +9077,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
|
|
- /* Is vectorizable conditional operation? */
|
|
- gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
|
|
- if (!stmt)
|
|
- return false;
|
|
-
|
|
- code = gimple_assign_rhs_code (stmt);
|
|
-
|
|
- if (code != COND_EXPR)
|
|
- return false;
|
|
-
|
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
|
tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
|
|
|
|
@@ -8981,7 +9110,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
return false;
|
|
|
|
masked = !COMPARISON_CLASS_P (cond_expr);
|
|
- vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
|
|
+ vec_cmp_type = truth_type_for (comp_vectype);
|
|
|
|
if (vec_cmp_type == NULL_TREE)
|
|
return false;
|
|
@@ -8993,6 +9122,29 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
cond_expr1 = TREE_OPERAND (cond_expr, 1);
|
|
}
|
|
|
|
+ /* For conditional reductions, the "then" value needs to be the candidate
|
|
+ value calculated by this iteration while the "else" value needs to be
|
|
+ the result carried over from previous iterations. If the COND_EXPR
|
|
+ is the other way around, we need to swap it. */
|
|
+ bool must_invert_cmp_result = false;
|
|
+ if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
|
|
+ {
|
|
+ if (masked)
|
|
+ must_invert_cmp_result = true;
|
|
+ else
|
|
+ {
|
|
+ bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
|
|
+ tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
|
|
+ if (new_code == ERROR_MARK)
|
|
+ must_invert_cmp_result = true;
|
|
+ else
|
|
+ cond_code = new_code;
|
|
+ }
|
|
+ /* Make sure we don't accidentally use the old condition. */
|
|
+ cond_expr = NULL_TREE;
|
|
+ std::swap (then_clause, else_clause);
|
|
+ }
|
|
+
|
|
if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
|
|
{
|
|
/* Boolean values may have another representation in vectors
|
|
@@ -9053,6 +9205,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
return false;
|
|
}
|
|
}
|
|
+ if (loop_vinfo
|
|
+ && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
|
|
+ && reduction_type == EXTRACT_LAST_REDUCTION)
|
|
+ {
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
+ "can't yet use a fully-masked loop for"
|
|
+ " EXTRACT_LAST_REDUCTION.\n");
|
|
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
|
|
+ }
|
|
if (expand_vec_cond_expr_p (vectype, comp_vectype,
|
|
cond_code))
|
|
{
|
|
@@ -9082,24 +9244,42 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
/* Handle cond expr. */
|
|
for (j = 0; j < ncopies; j++)
|
|
{
|
|
+ tree loop_mask = NULL_TREE;
|
|
+ bool swap_cond_operands = false;
|
|
+
|
|
+ /* See whether another part of the vectorized code applies a loop
|
|
+ mask to the condition, or to its inverse. */
|
|
+
|
|
+ if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
|
|
+ {
|
|
+ scalar_cond_masked_key cond (cond_expr, ncopies);
|
|
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond))
|
|
+ {
|
|
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
|
+ loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
|
|
+ cond.code = invert_tree_comparison (cond.code, honor_nans);
|
|
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond))
|
|
+ {
|
|
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
|
|
+ loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
|
|
+ vectype, j);
|
|
+ cond_code = cond.code;
|
|
+ swap_cond_operands = true;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
stmt_vec_info new_stmt_info = NULL;
|
|
if (j == 0)
|
|
{
|
|
if (slp_node)
|
|
{
|
|
- auto_vec<tree, 4> ops;
|
|
auto_vec<vec<tree>, 4> vec_defs;
|
|
-
|
|
- if (masked)
|
|
- ops.safe_push (cond_expr);
|
|
- else
|
|
- {
|
|
- ops.safe_push (cond_expr0);
|
|
- ops.safe_push (cond_expr1);
|
|
- }
|
|
- ops.safe_push (then_clause);
|
|
- ops.safe_push (else_clause);
|
|
- vect_get_slp_defs (ops, slp_node, &vec_defs);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds3 = vec_defs.pop ();
|
|
vec_oprnds2 = vec_defs.pop ();
|
|
if (!masked)
|
|
@@ -9159,6 +9339,9 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
vec_then_clause = vec_oprnds2[i];
|
|
vec_else_clause = vec_oprnds3[i];
|
|
|
|
+ if (swap_cond_operands)
|
|
+ std::swap (vec_then_clause, vec_else_clause);
|
|
+
|
|
if (masked)
|
|
vec_compare = vec_cond_lhs;
|
|
else
|
|
@@ -9197,6 +9380,50 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
}
|
|
+
|
|
+ /* If we decided to apply a loop mask to the result of the vector
|
|
+ comparison, AND the comparison with the mask now. Later passes
|
|
+ should then be able to reuse the AND results between mulitple
|
|
+ vector statements.
|
|
+
|
|
+ For example:
|
|
+ for (int i = 0; i < 100; ++i)
|
|
+ x[i] = y[i] ? z[i] : 10;
|
|
+
|
|
+ results in following optimized GIMPLE:
|
|
+
|
|
+ mask__35.8_43 = vect__4.7_41 != { 0, ... };
|
|
+ vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
|
|
+ _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
|
|
+ vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
|
|
+ vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
|
|
+ vect_iftmp.11_47, { 10, ... }>;
|
|
+
|
|
+ instead of using a masked and unmasked forms of
|
|
+ vec != { 0, ... } (masked in the MASK_LOAD,
|
|
+ unmasked in the VEC_COND_EXPR). */
|
|
+
|
|
+ if (loop_mask)
|
|
+ {
|
|
+ if (COMPARISON_CLASS_P (vec_compare))
|
|
+ {
|
|
+ tree tmp = make_ssa_name (vec_cmp_type);
|
|
+ tree op0 = TREE_OPERAND (vec_compare, 0);
|
|
+ tree op1 = TREE_OPERAND (vec_compare, 1);
|
|
+ gassign *g = gimple_build_assign (tmp,
|
|
+ TREE_CODE (vec_compare),
|
|
+ op0, op1);
|
|
+ vect_finish_stmt_generation (stmt_info, g, gsi);
|
|
+ vec_compare = tmp;
|
|
+ }
|
|
+
|
|
+ tree tmp2 = make_ssa_name (vec_cmp_type);
|
|
+ gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
|
|
+ vec_compare, loop_mask);
|
|
+ vect_finish_stmt_generation (stmt_info, g, gsi);
|
|
+ vec_compare = tmp2;
|
|
+ }
|
|
+
|
|
if (reduction_type == EXTRACT_LAST_REDUCTION)
|
|
{
|
|
if (!is_gimple_val (vec_compare))
|
|
@@ -9207,6 +9434,15 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
|
vec_compare = vec_compare_name;
|
|
}
|
|
+ if (must_invert_cmp_result)
|
|
+ {
|
|
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
|
|
+ gassign *new_stmt = gimple_build_assign (vec_compare_name,
|
|
+ BIT_NOT_EXPR,
|
|
+ vec_compare);
|
|
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
|
+ vec_compare = vec_compare_name;
|
|
+ }
|
|
gcall *new_stmt = gimple_build_call_internal
|
|
(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
|
|
vec_then_clause);
|
|
@@ -9345,7 +9581,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
/* Invariant comparison. */
|
|
if (!vectype)
|
|
{
|
|
- vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
|
|
if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
|
|
return false;
|
|
}
|
|
@@ -9446,12 +9682,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
{
|
|
if (slp_node)
|
|
{
|
|
- auto_vec<tree, 2> ops;
|
|
auto_vec<vec<tree>, 2> vec_defs;
|
|
-
|
|
- ops.safe_push (rhs1);
|
|
- ops.safe_push (rhs2);
|
|
- vect_get_slp_defs (ops, slp_node, &vec_defs);
|
|
+ vect_get_slp_defs (slp_node, &vec_defs);
|
|
vec_oprnds1 = vec_defs.pop ();
|
|
vec_oprnds0 = vec_defs.pop ();
|
|
if (swap_p)
|
|
@@ -9544,7 +9776,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
|
|
static bool
|
|
can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
- slp_tree slp_node, stmt_vec_info *vec_stmt,
|
|
+ slp_tree slp_node, slp_instance slp_node_instance,
|
|
+ stmt_vec_info *vec_stmt,
|
|
stmt_vector_for_cost *cost_vec)
|
|
{
|
|
if (slp_node)
|
|
@@ -9554,13 +9787,15 @@ can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
|
|
{
|
|
if (STMT_VINFO_LIVE_P (slp_stmt_info)
|
|
- && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
|
|
+ && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
|
|
+ slp_node_instance, i,
|
|
vec_stmt, cost_vec))
|
|
return false;
|
|
}
|
|
}
|
|
else if (STMT_VINFO_LIVE_P (stmt_info)
|
|
- && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
|
|
+ && !vectorizable_live_operation (stmt_info, gsi, slp_node,
|
|
+ slp_node_instance, -1,
|
|
vec_stmt, cost_vec))
|
|
return false;
|
|
|
|
@@ -9736,14 +9971,13 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|
|
|| vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
|
|
cost_vec)
|
|
|| vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
|
|
- || vectorizable_reduction (stmt_info, NULL, NULL, node,
|
|
- node_instance, cost_vec)
|
|
+ || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
|
|
|| vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
|
|
|| vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
|
|
- || vectorizable_condition (stmt_info, NULL, NULL, false, node,
|
|
- cost_vec)
|
|
+ || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
|
|
|| vectorizable_comparison (stmt_info, NULL, NULL, node,
|
|
- cost_vec));
|
|
+ cost_vec)
|
|
+ || vectorizable_lc_phi (stmt_info, NULL, node));
|
|
else
|
|
{
|
|
if (bb_vinfo)
|
|
@@ -9759,8 +9993,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|
|
|| vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
|
|
cost_vec)
|
|
|| vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
|
|
- || vectorizable_condition (stmt_info, NULL, NULL, false, node,
|
|
- cost_vec)
|
|
+ || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
|
|
|| vectorizable_comparison (stmt_info, NULL, NULL, node,
|
|
cost_vec));
|
|
}
|
|
@@ -9775,7 +10008,9 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|
|
need extra handling, except for vectorizable reductions. */
|
|
if (!bb_vinfo
|
|
&& STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
|
|
- && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
|
|
+ && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
|
|
+ && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
|
|
+ NULL, cost_vec))
|
|
return opt_result::failure_at (stmt_info->stmt,
|
|
"not vectorized:"
|
|
" live stmt not supported: %G",
|
|
@@ -9864,8 +10099,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
break;
|
|
|
|
case condition_vec_info_type:
|
|
- done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
|
|
- slp_node, NULL);
|
|
+ done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
|
|
gcc_assert (done);
|
|
break;
|
|
|
|
@@ -9887,8 +10121,18 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
break;
|
|
|
|
case reduc_vec_info_type:
|
|
- done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
|
|
- slp_node_instance, NULL);
|
|
+ done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
|
|
+ gcc_assert (done);
|
|
+ break;
|
|
+
|
|
+ case cycle_phi_info_type:
|
|
+ done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
|
|
+ slp_node_instance);
|
|
+ gcc_assert (done);
|
|
+ break;
|
|
+
|
|
+ case lc_phi_info_type:
|
|
+ done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
|
|
gcc_assert (done);
|
|
break;
|
|
|
|
@@ -9944,19 +10188,66 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|
}
|
|
}
|
|
|
|
- /* Handle stmts whose DEF is used outside the loop-nest that is
|
|
- being vectorized. */
|
|
- if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
|
|
- {
|
|
- done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
|
|
- NULL);
|
|
- gcc_assert (done);
|
|
- }
|
|
-
|
|
if (vec_stmt)
|
|
STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
|
|
|
|
- return is_store;
|
|
+ if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
|
|
+ return is_store;
|
|
+
|
|
+ /* If this stmt defines a value used on a backedge, update the
|
|
+ vectorized PHIs. */
|
|
+ stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
|
|
+ stmt_vec_info reduc_info;
|
|
+ if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
|
|
+ && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
|
|
+ && (reduc_info = info_for_reduction (orig_stmt_info))
|
|
+ && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
|
|
+ && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
|
|
+ {
|
|
+ gphi *phi;
|
|
+ if (!slp_node
|
|
+ && (phi = dyn_cast <gphi *>
|
|
+ (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
|
|
+ && dominated_by_p (CDI_DOMINATORS,
|
|
+ gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)))
|
|
+ {
|
|
+ edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
|
|
+ stmt_vec_info phi_info
|
|
+ = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
|
|
+ stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
|
|
+ do
|
|
+ {
|
|
+ add_phi_arg (as_a <gphi *> (phi_info->stmt),
|
|
+ gimple_get_lhs (vec_stmt->stmt), e,
|
|
+ gimple_phi_arg_location (phi, e->dest_idx));
|
|
+ phi_info = STMT_VINFO_RELATED_STMT (phi_info);
|
|
+ vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
|
|
+ }
|
|
+ while (phi_info);
|
|
+ gcc_assert (!vec_stmt);
|
|
+ }
|
|
+ else if (slp_node
|
|
+ && slp_node != slp_node_instance->reduc_phis)
|
|
+ {
|
|
+ slp_tree phi_node = slp_node_instance->reduc_phis;
|
|
+ gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
|
|
+ edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
|
|
+ gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
|
|
+ == SLP_TREE_VEC_STMTS (slp_node).length ());
|
|
+ for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
|
|
+ add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
|
|
+ gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
|
|
+ e, gimple_phi_arg_location (phi, e->dest_idx));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Handle stmts whose DEF is used outside the loop-nest that is
|
|
+ being vectorized. */
|
|
+ done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
|
|
+ slp_node_instance, &vec_stmt, NULL);
|
|
+ gcc_assert (done);
|
|
+
|
|
+ return false;
|
|
}
|
|
|
|
|
|
@@ -9979,18 +10270,28 @@ vect_remove_stores (stmt_vec_info first_stmt_info)
|
|
}
|
|
}
|
|
|
|
-/* Function get_vectype_for_scalar_type_and_size.
|
|
+/* If NUNITS is nonzero, return a vector type that contains NUNITS
|
|
+ elements of type SCALAR_TYPE, or null if the target doesn't support
|
|
+ such a type.
|
|
|
|
- Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
|
|
- by the target. */
|
|
+ If NUNITS is zero, return a vector type that contains elements of
|
|
+ type SCALAR_TYPE, choosing whichever vector size the target prefers.
|
|
+
|
|
+ If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
|
|
+ for this vectorization region and want to "autodetect" the best choice.
|
|
+ Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
|
|
+ and we want the new type to be interoperable with it. PREVAILING_MODE
|
|
+ in this case can be a scalar integer mode or a vector mode; when it
|
|
+ is a vector mode, the function acts like a tree-level version of
|
|
+ related_vector_mode. */
|
|
|
|
tree
|
|
-get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
|
|
+get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
|
|
+ tree scalar_type, poly_uint64 nunits)
|
|
{
|
|
tree orig_scalar_type = scalar_type;
|
|
scalar_mode inner_mode;
|
|
machine_mode simd_mode;
|
|
- poly_uint64 nunits;
|
|
tree vectype;
|
|
|
|
if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
|
|
@@ -10030,19 +10331,45 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
|
|
if (scalar_type == NULL_TREE)
|
|
return NULL_TREE;
|
|
|
|
- /* If no size was supplied use the mode the target prefers. Otherwise
|
|
- lookup a vector mode of the specified size. */
|
|
- if (known_eq (size, 0U))
|
|
- simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
|
|
- else if (!multiple_p (size, nbytes, &nunits)
|
|
- || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
|
|
- return NULL_TREE;
|
|
- /* NOTE: nunits == 1 is allowed to support single element vector types. */
|
|
- if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
|
|
- return NULL_TREE;
|
|
+ /* If no prevailing mode was supplied, use the mode the target prefers.
|
|
+ Otherwise lookup a vector mode based on the prevailing mode. */
|
|
+ if (prevailing_mode == VOIDmode)
|
|
+ {
|
|
+ gcc_assert (known_eq (nunits, 0U));
|
|
+ simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
|
|
+ if (SCALAR_INT_MODE_P (simd_mode))
|
|
+ {
|
|
+ /* Traditional behavior is not to take the integer mode
|
|
+ literally, but simply to use it as a way of determining
|
|
+ the vector size. It is up to mode_for_vector to decide
|
|
+ what the TYPE_MODE should be.
|
|
+
|
|
+ Note that nunits == 1 is allowed in order to support single
|
|
+ element vector types. */
|
|
+ if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
|
|
+ || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
|
|
+ return NULL_TREE;
|
|
+ }
|
|
+ }
|
|
+ else if (SCALAR_INT_MODE_P (prevailing_mode)
|
|
+ || !related_vector_mode (prevailing_mode,
|
|
+ inner_mode, nunits).exists (&simd_mode))
|
|
+ {
|
|
+ /* Fall back to using mode_for_vector, mostly in the hope of being
|
|
+ able to use an integer mode. */
|
|
+ if (known_eq (nunits, 0U)
|
|
+ && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
|
|
+ return NULL_TREE;
|
|
|
|
- vectype = build_vector_type (scalar_type, nunits);
|
|
+ if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
|
|
+ return NULL_TREE;
|
|
+ }
|
|
+
|
|
+ vectype = build_vector_type_for_mode (scalar_type, simd_mode);
|
|
|
|
+ /* In cases where the mode was chosen by mode_for_vector, check that
|
|
+ the target actually supports the chosen mode, or that it at least
|
|
+ allows the vector mode to be replaced by a like-sized integer. */
|
|
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|
|
&& !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
|
|
return NULL_TREE;
|
|
@@ -10056,22 +10383,22 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
|
|
return vectype;
|
|
}
|
|
|
|
-poly_uint64 current_vector_size;
|
|
-
|
|
/* Function get_vectype_for_scalar_type.
|
|
|
|
Returns the vector type corresponding to SCALAR_TYPE as supported
|
|
by the target. */
|
|
|
|
tree
|
|
-get_vectype_for_scalar_type (tree scalar_type)
|
|
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
|
|
{
|
|
- tree vectype;
|
|
- vectype = get_vectype_for_scalar_type_and_size (scalar_type,
|
|
- current_vector_size);
|
|
- if (vectype
|
|
- && known_eq (current_vector_size, 0U))
|
|
- current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
|
|
+ tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
|
|
+ scalar_type);
|
|
+ if (vectype && vinfo->vector_mode == VOIDmode)
|
|
+ vinfo->vector_mode = TYPE_MODE (vectype);
|
|
+
|
|
+ if (vectype)
|
|
+ vinfo->used_vector_modes.add (TYPE_MODE (vectype));
|
|
+
|
|
return vectype;
|
|
}
|
|
|
|
@@ -10081,15 +10408,14 @@ get_vectype_for_scalar_type (tree scalar_type)
|
|
of vectors of specified SCALAR_TYPE as supported by target. */
|
|
|
|
tree
|
|
-get_mask_type_for_scalar_type (tree scalar_type)
|
|
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
|
|
{
|
|
- tree vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
|
|
if (!vectype)
|
|
return NULL;
|
|
|
|
- return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
|
|
- current_vector_size);
|
|
+ return truth_type_for (vectype);
|
|
}
|
|
|
|
/* Function get_same_sized_vectype
|
|
@@ -10101,10 +10427,29 @@ tree
|
|
get_same_sized_vectype (tree scalar_type, tree vector_type)
|
|
{
|
|
if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
|
|
- return build_same_sized_truth_vector_type (vector_type);
|
|
+ return truth_type_for (vector_type);
|
|
+
|
|
+ poly_uint64 nunits;
|
|
+ if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
|
|
+ GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
|
|
+ return NULL_TREE;
|
|
+
|
|
+ return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
|
|
+ scalar_type, nunits);
|
|
+}
|
|
+
|
|
+/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
|
|
+ would not change the chosen vector modes. */
|
|
|
|
- return get_vectype_for_scalar_type_and_size
|
|
- (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
|
|
+bool
|
|
+vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
|
|
+{
|
|
+ for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
|
|
+ i != vinfo->used_vector_modes.end (); ++i)
|
|
+ if (!VECTOR_MODE_P (*i)
|
|
+ || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
|
|
+ return false;
|
|
+ return true;
|
|
}
|
|
|
|
/* Function vect_is_simple_use.
|
|
@@ -10492,11 +10837,8 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
|
|
{
|
|
intermediate_mode = insn_data[icode1].operand[0].mode;
|
|
if (VECTOR_BOOLEAN_TYPE_P (prev_type))
|
|
- {
|
|
- intermediate_type = vect_halve_mask_nunits (prev_type);
|
|
- if (intermediate_mode != TYPE_MODE (intermediate_type))
|
|
- return false;
|
|
- }
|
|
+ intermediate_type
|
|
+ = vect_halve_mask_nunits (prev_type, intermediate_mode);
|
|
else
|
|
intermediate_type
|
|
= lang_hooks.types.type_for_mode (intermediate_mode,
|
|
@@ -10680,11 +11022,8 @@ supportable_narrowing_operation (enum tree_code code,
|
|
{
|
|
intermediate_mode = insn_data[icode1].operand[0].mode;
|
|
if (VECTOR_BOOLEAN_TYPE_P (prev_type))
|
|
- {
|
|
- intermediate_type = vect_double_mask_nunits (prev_type);
|
|
- if (intermediate_mode != TYPE_MODE (intermediate_type))
|
|
- return false;
|
|
- }
|
|
+ intermediate_type
|
|
+ = vect_double_mask_nunits (prev_type, intermediate_mode);
|
|
else
|
|
intermediate_type
|
|
= lang_hooks.types.type_for_mode (intermediate_mode, uns);
|
|
@@ -10777,6 +11116,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
|
|
tree *stmt_vectype_out,
|
|
tree *nunits_vectype_out)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
gimple *stmt = stmt_info->stmt;
|
|
|
|
*stmt_vectype_out = NULL_TREE;
|
|
@@ -10810,7 +11150,12 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
|
|
tree vectype;
|
|
tree scalar_type = NULL_TREE;
|
|
if (STMT_VINFO_VECTYPE (stmt_info))
|
|
- *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ {
|
|
+ *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "precomputed vectype: %T\n", vectype);
|
|
+ }
|
|
else
|
|
{
|
|
gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
|
|
@@ -10842,8 +11187,8 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
|
|
|
|
if (dump_enabled_p ())
|
|
dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "get vectype for scalar type: %T\n", scalar_type);
|
|
- vectype = get_vectype_for_scalar_type (scalar_type);
|
|
+ "get vectype for scalar type: %T\n", scalar_type);
|
|
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
if (!vectype)
|
|
return opt_result::failure_at (stmt,
|
|
"not vectorized:"
|
|
@@ -10859,42 +11204,38 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
|
|
|
|
/* Don't try to compute scalar types if the stmt produces a boolean
|
|
vector; use the existing vector type instead. */
|
|
- tree nunits_vectype;
|
|
- if (VECTOR_BOOLEAN_TYPE_P (vectype))
|
|
- nunits_vectype = vectype;
|
|
- else
|
|
+ tree nunits_vectype = vectype;
|
|
+ if (!VECTOR_BOOLEAN_TYPE_P (vectype)
|
|
+ && *stmt_vectype_out != boolean_type_node)
|
|
{
|
|
/* The number of units is set according to the smallest scalar
|
|
type (or the largest vector size, but we only support one
|
|
vector size per vectorization). */
|
|
- if (*stmt_vectype_out != boolean_type_node)
|
|
+ HOST_WIDE_INT dummy;
|
|
+ scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
|
|
+ if (scalar_type != TREE_TYPE (vectype))
|
|
{
|
|
- HOST_WIDE_INT dummy;
|
|
- scalar_type = vect_get_smallest_scalar_type (stmt_info,
|
|
- &dummy, &dummy);
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
|
+ "get vectype for smallest scalar type: %T\n",
|
|
+ scalar_type);
|
|
+ nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
|
+ if (!nunits_vectype)
|
|
+ return opt_result::failure_at
|
|
+ (stmt, "not vectorized: unsupported data-type %T\n",
|
|
+ scalar_type);
|
|
+ if (dump_enabled_p ())
|
|
+ dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
|
|
+ nunits_vectype);
|
|
}
|
|
- if (dump_enabled_p ())
|
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
|
- "get vectype for scalar type: %T\n", scalar_type);
|
|
- nunits_vectype = get_vectype_for_scalar_type (scalar_type);
|
|
}
|
|
- if (!nunits_vectype)
|
|
- return opt_result::failure_at (stmt,
|
|
- "not vectorized: unsupported data-type %T\n",
|
|
- scalar_type);
|
|
|
|
- if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
|
|
- GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
|
|
- return opt_result::failure_at (stmt,
|
|
- "not vectorized: different sized vector "
|
|
- "types in statement, %T and %T\n",
|
|
- vectype, nunits_vectype);
|
|
+ gcc_assert (*stmt_vectype_out == boolean_type_node
|
|
+ || multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
|
|
+ TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
|
|
|
|
if (dump_enabled_p ())
|
|
{
|
|
- dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
|
|
- nunits_vectype);
|
|
-
|
|
dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
|
|
dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
|
|
dump_printf (MSG_NOTE, "\n");
|
|
@@ -10911,6 +11252,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
|
|
opt_tree
|
|
vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
|
|
{
|
|
+ vec_info *vinfo = stmt_info->vinfo;
|
|
gimple *stmt = stmt_info->stmt;
|
|
tree mask_type = NULL;
|
|
tree vectype, scalar_type;
|
|
@@ -10920,7 +11262,7 @@ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
|
|
&& !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
|
|
{
|
|
scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
|
|
- mask_type = get_mask_type_for_scalar_type (scalar_type);
|
|
+ mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
|
|
|
|
if (!mask_type)
|
|
return opt_tree::failure_at (stmt,
|
|
@@ -10968,7 +11310,7 @@ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
|
|
&& !VECTOR_BOOLEAN_TYPE_P (mask_type)
|
|
&& gimple_code (stmt) == GIMPLE_ASSIGN
|
|
&& TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
|
|
- mask_type = build_same_sized_truth_vector_type (mask_type);
|
|
+ mask_type = truth_type_for (mask_type);
|
|
}
|
|
|
|
/* No mask_type should mean loop invariant predicate.
|
|
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
|
|
index d89ec3b7c76..c2c6377d3f9 100644
|
|
--- a/gcc/tree-vectorizer.c
|
|
+++ b/gcc/tree-vectorizer.c
|
|
@@ -639,8 +639,11 @@ vec_info::new_stmt_vec_info (gimple *stmt)
|
|
STMT_VINFO_TYPE (res) = undef_vec_info_type;
|
|
STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
|
|
STMT_VINFO_VECTORIZABLE (res) = true;
|
|
- STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
|
|
- STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
|
|
+ STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
|
|
+ STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
|
|
+ STMT_VINFO_REDUC_FN (res) = IFN_LAST;
|
|
+ STMT_VINFO_REDUC_IDX (res) = -1;
|
|
+ STMT_VINFO_SLP_VECT_ONLY (res) = false;
|
|
|
|
if (gimple_code (stmt) == GIMPLE_PHI
|
|
&& is_loop_header_bb_p (gimple_bb (stmt)))
|
|
@@ -862,8 +865,7 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
|
|
|
|
static unsigned
|
|
try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
|
- unsigned *num_vectorized_loops,
|
|
- loop_p loop, loop_vec_info orig_loop_vinfo,
|
|
+ unsigned *num_vectorized_loops, loop_p loop,
|
|
gimple *loop_vectorized_call,
|
|
gimple *loop_dist_alias_call)
|
|
{
|
|
@@ -871,6 +873,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
|
vec_info_shared shared;
|
|
auto_purge_vect_location sentinel;
|
|
vect_location = find_loop_location (loop);
|
|
+
|
|
if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
|
|
&& dump_enabled_p ())
|
|
dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
|
|
@@ -878,10 +881,17 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
|
LOCATION_FILE (vect_location.get_location_t ()),
|
|
LOCATION_LINE (vect_location.get_location_t ()));
|
|
|
|
- /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */
|
|
- opt_loop_vec_info loop_vinfo
|
|
- = vect_analyze_loop (loop, orig_loop_vinfo, &shared);
|
|
- loop->aux = loop_vinfo;
|
|
+ opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
|
|
+ /* In the case of epilogue vectorization the loop already has its
|
|
+ loop_vec_info set, we do not require to analyze the loop in this case. */
|
|
+ if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
|
|
+ loop_vinfo = opt_loop_vec_info::success (vinfo);
|
|
+ else
|
|
+ {
|
|
+ /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */
|
|
+ loop_vinfo = vect_analyze_loop (loop, &shared);
|
|
+ loop->aux = loop_vinfo;
|
|
+ }
|
|
|
|
if (!loop_vinfo)
|
|
if (dump_enabled_p ())
|
|
@@ -968,7 +978,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
|
unsigned HOST_WIDE_INT bytes;
|
|
if (dump_enabled_p ())
|
|
{
|
|
- if (current_vector_size.is_constant (&bytes))
|
|
+ if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
|
|
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
|
|
"loop vectorized using %wu byte vectors\n", bytes);
|
|
else
|
|
@@ -1009,8 +1019,13 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
|
|
|
/* Epilogue of vectorized loop must be vectorized too. */
|
|
if (new_loop)
|
|
- ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
|
|
- new_loop, loop_vinfo, NULL, NULL);
|
|
+ {
|
|
+ /* Don't include vectorized epilogues in the "vectorized loops" count.
|
|
+ */
|
|
+ unsigned dont_count = *num_vectorized_loops;
|
|
+ ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
|
|
+ new_loop, NULL, NULL);
|
|
+ }
|
|
|
|
return ret;
|
|
}
|
|
@@ -1026,8 +1041,7 @@ try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
|
|
|| loop->force_vectorize))
|
|
return 0;
|
|
|
|
- return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
|
|
- loop, NULL,
|
|
+ return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
|
|
vect_loop_vectorized_call (loop),
|
|
vect_loop_dist_alias_call (loop));
|
|
}
|
|
@@ -1344,7 +1358,8 @@ get_vec_alignment_for_array_type (tree type)
|
|
gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
|
|
poly_uint64 array_size, vector_size;
|
|
|
|
- tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
|
|
+ tree scalar_type = strip_array_types (type);
|
|
+ tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
|
|
if (!vectype
|
|
|| !poly_int_tree_p (TYPE_SIZE (type), &array_size)
|
|
|| !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
|
|
@@ -1512,3 +1527,36 @@ make_pass_ipa_increase_alignment (gcc::context *ctxt)
|
|
{
|
|
return new pass_ipa_increase_alignment (ctxt);
|
|
}
|
|
+
|
|
+/* If the condition represented by T is a comparison or the SSA name
|
|
+ result of a comparison, extract the comparison's operands. Represent
|
|
+ T as NE_EXPR <T, 0> otherwise. */
|
|
+
|
|
+void
|
|
+scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
|
|
+{
|
|
+ if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
|
|
+ {
|
|
+ this->code = TREE_CODE (t);
|
|
+ this->op0 = TREE_OPERAND (t, 0);
|
|
+ this->op1 = TREE_OPERAND (t, 1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (TREE_CODE (t) == SSA_NAME)
|
|
+ if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
|
|
+ {
|
|
+ tree_code code = gimple_assign_rhs_code (stmt);
|
|
+ if (TREE_CODE_CLASS (code) == tcc_comparison)
|
|
+ {
|
|
+ this->code = code;
|
|
+ this->op0 = gimple_assign_rhs1 (stmt);
|
|
+ this->op1 = gimple_assign_rhs2 (stmt);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ this->code = NE_EXPR;
|
|
+ this->op0 = t;
|
|
+ this->op1 = build_zero_cst (TREE_TYPE (t));
|
|
+}
|
|
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
|
|
index 148b9a7f215..c46e2742c36 100644
|
|
--- a/gcc/tree-vectorizer.h
|
|
+++ b/gcc/tree-vectorizer.h
|
|
@@ -26,6 +26,7 @@ typedef struct _stmt_vec_info *stmt_vec_info;
|
|
#include "tree-data-ref.h"
|
|
#include "tree-hash-traits.h"
|
|
#include "target.h"
|
|
+#include <utility>
|
|
|
|
/* Used for naming of new temporaries. */
|
|
enum vect_var_kind {
|
|
@@ -120,6 +121,8 @@ struct _slp_tree {
|
|
vec<slp_tree> children;
|
|
/* A group of scalar stmts to be vectorized together. */
|
|
vec<stmt_vec_info> stmts;
|
|
+ /* A group of scalar operands to be vectorized together. */
|
|
+ vec<tree> ops;
|
|
/* Load permutation relative to the stores, NULL if there is no
|
|
permutation. */
|
|
vec<unsigned> load_permutation;
|
|
@@ -170,13 +173,82 @@ typedef struct _slp_instance {
|
|
|
|
#define SLP_TREE_CHILDREN(S) (S)->children
|
|
#define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
|
|
+#define SLP_TREE_SCALAR_OPS(S) (S)->ops
|
|
#define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
|
|
#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
|
|
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
|
|
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
|
|
#define SLP_TREE_DEF_TYPE(S) (S)->def_type
|
|
|
|
+/* Key for map that records association between
|
|
+ scalar conditions and corresponding loop mask, and
|
|
+ is populated by vect_record_loop_mask. */
|
|
|
|
+struct scalar_cond_masked_key
|
|
+{
|
|
+ scalar_cond_masked_key (tree t, unsigned ncopies_)
|
|
+ : ncopies (ncopies_)
|
|
+ {
|
|
+ get_cond_ops_from_tree (t);
|
|
+ }
|
|
+
|
|
+ void get_cond_ops_from_tree (tree);
|
|
+
|
|
+ unsigned ncopies;
|
|
+ tree_code code;
|
|
+ tree op0;
|
|
+ tree op1;
|
|
+};
|
|
+
|
|
+template<>
|
|
+struct default_hash_traits<scalar_cond_masked_key>
|
|
+{
|
|
+ typedef scalar_cond_masked_key compare_type;
|
|
+ typedef scalar_cond_masked_key value_type;
|
|
+
|
|
+ static inline hashval_t
|
|
+ hash (value_type v)
|
|
+ {
|
|
+ inchash::hash h;
|
|
+ h.add_int (v.code);
|
|
+ inchash::add_expr (v.op0, h, 0);
|
|
+ inchash::add_expr (v.op1, h, 0);
|
|
+ h.add_int (v.ncopies);
|
|
+ return h.end ();
|
|
+ }
|
|
+
|
|
+ static inline bool
|
|
+ equal (value_type existing, value_type candidate)
|
|
+ {
|
|
+ return (existing.ncopies == candidate.ncopies
|
|
+ && existing.code == candidate.code
|
|
+ && operand_equal_p (existing.op0, candidate.op0, 0)
|
|
+ && operand_equal_p (existing.op1, candidate.op1, 0));
|
|
+ }
|
|
+
|
|
+ static inline void
|
|
+ mark_empty (value_type &v)
|
|
+ {
|
|
+ v.ncopies = 0;
|
|
+ }
|
|
+
|
|
+ static inline bool
|
|
+ is_empty (value_type v)
|
|
+ {
|
|
+ return v.ncopies == 0;
|
|
+ }
|
|
+
|
|
+ static inline void mark_deleted (value_type &) {}
|
|
+
|
|
+ static inline bool is_deleted (const value_type &)
|
|
+ {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ static inline void remove (value_type &) {}
|
|
+};
|
|
+
|
|
+typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
|
|
|
|
/* Describes two objects whose addresses must be unequal for the vectorized
|
|
loop to be valid. */
|
|
@@ -217,6 +289,7 @@ struct vec_info_shared {
|
|
|
|
/* Vectorizer state common between loop and basic-block vectorization. */
|
|
struct vec_info {
|
|
+ typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
|
|
enum vec_kind { bb, loop };
|
|
|
|
vec_info (vec_kind, void *, vec_info_shared *);
|
|
@@ -254,6 +327,14 @@ struct vec_info {
|
|
/* Cost data used by the target cost model. */
|
|
void *target_cost_data;
|
|
|
|
+ /* The set of vector modes used in the vectorized region. */
|
|
+ mode_set used_vector_modes;
|
|
+
|
|
+ /* The argument we should pass to related_vector_mode when looking up
|
|
+ the vector mode for a scalar mode, or VOIDmode if we haven't yet
|
|
+ made any decisions about which vector modes to use. */
|
|
+ machine_mode vector_mode;
|
|
+
|
|
private:
|
|
stmt_vec_info new_stmt_vec_info (gimple *stmt);
|
|
void set_vinfo_for_stmt (gimple *, stmt_vec_info);
|
|
@@ -377,6 +458,8 @@ struct rgroup_masks {
|
|
|
|
typedef auto_vec<rgroup_masks> vec_loop_masks;
|
|
|
|
+typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
|
|
+
|
|
/*-----------------------------------------------------------------*/
|
|
/* Info on vectorized loops. */
|
|
/*-----------------------------------------------------------------*/
|
|
@@ -399,7 +482,7 @@ typedef struct _loop_vec_info : public vec_info {
|
|
/* Condition under which this loop is analyzed and versioned. */
|
|
tree num_iters_assumptions;
|
|
|
|
- /* Threshold of number of iterations below which vectorzation will not be
|
|
+ /* Threshold of number of iterations below which vectorization will not be
|
|
performed. It is calculated from MIN_PROFITABLE_ITERS and
|
|
PARAM_MIN_VECT_LOOP_BOUND. */
|
|
unsigned int th;
|
|
@@ -421,6 +504,9 @@ typedef struct _loop_vec_info : public vec_info {
|
|
on inactive scalars. */
|
|
vec_loop_masks masks;
|
|
|
|
+ /* Set of scalar conditions that have loop mask applied. */
|
|
+ scalar_cond_masked_set_type scalar_cond_masked_set;
|
|
+
|
|
/* If we are using a loop mask to align memory addresses, this variable
|
|
contains the number of vector elements that we should skip in the
|
|
first iteration of the vector loop (i.e. the number of leading
|
|
@@ -497,6 +583,13 @@ typedef struct _loop_vec_info : public vec_info {
|
|
/* Cost of a single scalar iteration. */
|
|
int single_scalar_iteration_cost;
|
|
|
|
+ /* The cost of the vector prologue and epilogue, including peeled
|
|
+ iterations and set-up code. */
|
|
+ int vec_outside_cost;
|
|
+
|
|
+ /* The cost of the vector loop body. */
|
|
+ int vec_inside_cost;
|
|
+
|
|
/* Is the loop vectorizable? */
|
|
bool vectorizable;
|
|
|
|
@@ -551,6 +644,10 @@ typedef struct _loop_vec_info : public vec_info {
|
|
this points to the original vectorized loop. Otherwise NULL. */
|
|
_loop_vec_info *orig_loop_info;
|
|
|
|
+ /* Used to store loop_vec_infos of epilogues of this loop during
|
|
+ analysis. */
|
|
+ vec<_loop_vec_info *> epilogue_vinfos;
|
|
+
|
|
} *loop_vec_info;
|
|
|
|
/* Access Functions. */
|
|
@@ -682,6 +779,8 @@ enum stmt_vec_info_type {
|
|
type_promotion_vec_info_type,
|
|
type_demotion_vec_info_type,
|
|
type_conversion_vec_info_type,
|
|
+ cycle_phi_info_type,
|
|
+ lc_phi_info_type,
|
|
loop_exit_ctrl_vec_info_type
|
|
};
|
|
|
|
@@ -917,21 +1016,42 @@ struct _stmt_vec_info {
|
|
for loop vectorization. */
|
|
vect_memory_access_type memory_access_type;
|
|
|
|
- /* For reduction loops, this is the type of reduction. */
|
|
- enum vect_reduction_type v_reduc_type;
|
|
+ /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */
|
|
+ tree induc_cond_initial_val;
|
|
|
|
- /* For CONST_COND_REDUCTION, record the reduc code. */
|
|
- enum tree_code const_cond_reduc_code;
|
|
+ /* If not NULL the value to be added to compute final reduction value. */
|
|
+ tree reduc_epilogue_adjustment;
|
|
|
|
/* On a reduction PHI the reduction type as detected by
|
|
- vect_force_simple_reduction. */
|
|
+ vect_is_simple_reduction and vectorizable_reduction. */
|
|
enum vect_reduction_type reduc_type;
|
|
|
|
+ /* The original reduction code, to be used in the epilogue. */
|
|
+ enum tree_code reduc_code;
|
|
+ /* An internal function we should use in the epilogue. */
|
|
+ internal_fn reduc_fn;
|
|
+
|
|
+ /* On a stmt participating in the reduction the index of the operand
|
|
+ on the reduction SSA cycle. */
|
|
+ int reduc_idx;
|
|
+
|
|
/* On a reduction PHI the def returned by vect_force_simple_reduction.
|
|
On the def returned by vect_force_simple_reduction the
|
|
corresponding PHI. */
|
|
stmt_vec_info reduc_def;
|
|
|
|
+ /* The vector input type relevant for reduction vectorization. */
|
|
+ tree reduc_vectype_in;
|
|
+
|
|
+ /* The vector type for performing the actual reduction. */
|
|
+ tree reduc_vectype;
|
|
+
|
|
+ /* Whether we force a single cycle PHI during reduction vectorization. */
|
|
+ bool force_single_cycle;
|
|
+
|
|
+ /* Whether on this stmt reduction meta is recorded. */
|
|
+ bool is_reduc_info;
|
|
+
|
|
/* The number of scalar stmt references from active SLP instances. */
|
|
unsigned int num_slp_uses;
|
|
|
|
@@ -949,6 +1069,9 @@ struct _stmt_vec_info {
|
|
and OPERATION_BITS without changing the result. */
|
|
unsigned int operation_precision;
|
|
signop operation_sign;
|
|
+
|
|
+ /* True if this is only suitable for SLP vectorization. */
|
|
+ bool slp_vect_only_p;
|
|
};
|
|
|
|
/* Information about a gather/scatter call. */
|
|
@@ -1011,8 +1134,10 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
|
|
#define STMT_VINFO_STRIDED_P(S) (S)->strided_p
|
|
#define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type
|
|
#define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
|
|
-#define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type
|
|
-#define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code
|
|
+#define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val
|
|
+#define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment
|
|
+#define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx
|
|
+#define STMT_VINFO_FORCE_SINGLE_CYCLE(S) (S)->force_single_cycle
|
|
|
|
#define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop
|
|
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address
|
|
@@ -1043,7 +1168,12 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
|
|
#define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist
|
|
#define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses
|
|
#define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
|
|
+#define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code
|
|
+#define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn
|
|
#define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
|
|
+#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype
|
|
+#define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in
|
|
+#define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p
|
|
|
|
#define DR_GROUP_FIRST_ELEMENT(S) \
|
|
(gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
|
|
@@ -1358,7 +1488,7 @@ vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
|
|
static inline void
|
|
vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
|
|
{
|
|
- /* All unit counts have the form current_vector_size * X for some
|
|
+ /* All unit counts have the form vec_info::vector_size * X for some
|
|
rational X, so two unit sizes must have a common multiple.
|
|
Everything is a multiple of the initial value of 1. */
|
|
*max_nunits = force_common_multiple (*max_nunits, nunits);
|
|
@@ -1466,20 +1596,22 @@ extern void vect_set_loop_condition (struct loop *, loop_vec_info,
|
|
extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
|
|
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
|
|
struct loop *, edge);
|
|
-struct loop *vect_loop_versioning (loop_vec_info, unsigned int, bool,
|
|
- poly_uint64);
|
|
+struct loop *vect_loop_versioning (loop_vec_info);
|
|
extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
|
|
- tree *, tree *, tree *, int, bool, bool);
|
|
+ tree *, tree *, tree *, int, bool, bool,
|
|
+ tree *, drs_init_vec &);
|
|
extern void vect_prepare_for_masked_peels (loop_vec_info);
|
|
extern dump_user_location_t find_loop_location (struct loop *);
|
|
extern bool vect_can_advance_ivs_p (loop_vec_info);
|
|
+extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
|
|
|
|
/* In tree-vect-stmts.c. */
|
|
-extern poly_uint64 current_vector_size;
|
|
-extern tree get_vectype_for_scalar_type (tree);
|
|
-extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64);
|
|
-extern tree get_mask_type_for_scalar_type (tree);
|
|
+extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
|
|
+ poly_uint64 = 0);
|
|
+extern tree get_vectype_for_scalar_type (vec_info *, tree);
|
|
+extern tree get_mask_type_for_scalar_type (vec_info *, tree);
|
|
extern tree get_same_sized_vectype (tree, tree);
|
|
+extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
|
|
extern bool vect_get_loop_mask_type (loop_vec_info);
|
|
extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
|
|
stmt_vec_info * = NULL, gimple ** = NULL);
|
|
@@ -1491,15 +1623,15 @@ extern bool supportable_widening_operation (enum tree_code, stmt_vec_info,
|
|
enum tree_code *, int *,
|
|
vec<tree> *);
|
|
extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
|
|
- enum tree_code *,
|
|
- int *, vec<tree> *);
|
|
+ enum tree_code *, int *,
|
|
+ vec<tree> *);
|
|
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
|
|
enum vect_cost_for_stmt, stmt_vec_info,
|
|
int, enum vect_cost_model_location);
|
|
extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *);
|
|
extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *,
|
|
gimple_stmt_iterator *);
|
|
-extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info);
|
|
+extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
|
|
extern tree vect_get_store_rhs (stmt_vec_info);
|
|
extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type);
|
|
extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL);
|
|
@@ -1515,19 +1647,13 @@ extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *,
|
|
extern void vect_remove_stores (stmt_vec_info);
|
|
extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
|
|
slp_instance, stmt_vector_for_cost *);
|
|
-extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
|
|
- stmt_vec_info *, bool, slp_tree,
|
|
- stmt_vector_for_cost *);
|
|
-extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
|
|
- stmt_vec_info *, slp_tree,
|
|
- stmt_vector_for_cost *);
|
|
extern void vect_get_load_cost (stmt_vec_info, int, bool,
|
|
unsigned int *, unsigned int *,
|
|
stmt_vector_for_cost *,
|
|
stmt_vector_for_cost *, bool);
|
|
extern void vect_get_store_cost (stmt_vec_info, int,
|
|
unsigned int *, stmt_vector_for_cost *);
|
|
-extern bool vect_supportable_shift (enum tree_code, tree);
|
|
+extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
|
|
extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
|
|
extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
|
|
extern void optimize_mask_stores (struct loop*);
|
|
@@ -1557,7 +1683,7 @@ extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
|
|
gather_scatter_info *);
|
|
extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
|
|
vec<data_reference_p> *);
|
|
-extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *);
|
|
+extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
|
|
extern void vect_record_base_alignments (vec_info *);
|
|
extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
|
|
tree *, gimple_stmt_iterator *,
|
|
@@ -1586,40 +1712,43 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *,
|
|
tree, tree = NULL_TREE);
|
|
|
|
/* In tree-vect-loop.c. */
|
|
-/* FORNOW: Used in tree-parloops.c. */
|
|
-extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info,
|
|
- bool *, bool);
|
|
-/* Used in gimple-loop-interchange.c. */
|
|
+/* Used in tree-vect-loop-manip.c */
|
|
+extern void determine_peel_for_niter (loop_vec_info);
|
|
+/* Used in gimple-loop-interchange.c and tree-parloops.c. */
|
|
extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
|
|
enum tree_code);
|
|
+extern bool needs_fold_left_reduction_p (tree, tree_code);
|
|
/* Drive for loop analysis stage. */
|
|
-extern opt_loop_vec_info vect_analyze_loop (struct loop *,
|
|
- loop_vec_info,
|
|
- vec_info_shared *);
|
|
+extern opt_loop_vec_info vect_analyze_loop (struct loop *, vec_info_shared *);
|
|
extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
|
|
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
|
|
tree *, bool);
|
|
-extern tree vect_halve_mask_nunits (tree);
|
|
-extern tree vect_double_mask_nunits (tree);
|
|
+extern tree vect_halve_mask_nunits (tree, machine_mode);
|
|
+extern tree vect_double_mask_nunits (tree, machine_mode);
|
|
extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
|
|
- unsigned int, tree);
|
|
+ unsigned int, tree, tree);
|
|
extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
|
|
unsigned int, tree, unsigned int);
|
|
+extern stmt_vec_info info_for_reduction (stmt_vec_info);
|
|
|
|
/* Drive for loop transformation stage. */
|
|
extern struct loop *vect_transform_loop (loop_vec_info);
|
|
extern opt_loop_vec_info vect_analyze_loop_form (struct loop *,
|
|
vec_info_shared *);
|
|
extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *,
|
|
- slp_tree, int, stmt_vec_info *,
|
|
+ slp_tree, slp_instance, int,
|
|
+ stmt_vec_info *,
|
|
stmt_vector_for_cost *);
|
|
-extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
|
|
- stmt_vec_info *, slp_tree, slp_instance,
|
|
+extern bool vectorizable_reduction (stmt_vec_info, slp_tree, slp_instance,
|
|
stmt_vector_for_cost *);
|
|
extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
|
|
stmt_vec_info *, slp_tree,
|
|
stmt_vector_for_cost *);
|
|
-extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *);
|
|
+extern bool vect_transform_reduction (stmt_vec_info, gimple_stmt_iterator *,
|
|
+ stmt_vec_info *, slp_tree);
|
|
+extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *,
|
|
+ slp_tree, slp_instance);
|
|
+extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree);
|
|
extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
|
|
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
|
|
stmt_vector_for_cost *,
|
|
@@ -1637,15 +1766,16 @@ extern void vect_schedule_slp (vec_info *);
|
|
extern opt_result vect_analyze_slp (vec_info *, unsigned);
|
|
extern bool vect_make_slp_decision (loop_vec_info);
|
|
extern void vect_detect_hybrid_slp (loop_vec_info);
|
|
-extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *);
|
|
+extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U);
|
|
extern bool vect_slp_bb (basic_block);
|
|
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
|
|
extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
|
|
-extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode,
|
|
+extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
|
|
+ machine_mode,
|
|
unsigned int * = NULL,
|
|
tree * = NULL, tree * = NULL);
|
|
-extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>,
|
|
- unsigned int, vec<tree> &);
|
|
+extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
|
|
+ vec<tree>, unsigned int, vec<tree> &);
|
|
extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
|
|
|
|
/* In tree-vect-patterns.c. */
|
|
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
|
|
index 2140101d7d2..fbcd8aa6367 100644
|
|
--- a/gcc/tree-vrp.c
|
|
+++ b/gcc/tree-vrp.c
|
|
@@ -69,23 +69,20 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "builtins.h"
|
|
#include "wide-int-range.h"
|
|
|
|
+static bool
|
|
+ranges_from_anti_range (const value_range_base *ar,
|
|
+ value_range_base *vr0, value_range_base *vr1,
|
|
+ bool handle_pointers = false);
|
|
+
|
|
/* Set of SSA names found live during the RPO traversal of the function
|
|
for still active basic-blocks. */
|
|
static sbitmap *live;
|
|
|
|
-void
|
|
-value_range_base::set (enum value_range_kind kind, tree min, tree max)
|
|
-{
|
|
- m_kind = kind;
|
|
- m_min = min;
|
|
- m_max = max;
|
|
- if (flag_checking)
|
|
- check ();
|
|
-}
|
|
-
|
|
void
|
|
value_range::set_equiv (bitmap equiv)
|
|
{
|
|
+ if (undefined_p () || varying_p ())
|
|
+ equiv = NULL;
|
|
/* Since updating the equivalence set involves deep copying the
|
|
bitmaps, only do it if absolutely necessary.
|
|
|
|
@@ -261,7 +258,8 @@ value_range_base::constant_p () const
|
|
void
|
|
value_range_base::set_undefined ()
|
|
{
|
|
- set (VR_UNDEFINED, NULL, NULL);
|
|
+ m_kind = VR_UNDEFINED;
|
|
+ m_min = m_max = NULL;
|
|
}
|
|
|
|
void
|
|
@@ -273,7 +271,8 @@ value_range::set_undefined ()
|
|
void
|
|
value_range_base::set_varying ()
|
|
{
|
|
- set (VR_VARYING, NULL, NULL);
|
|
+ m_kind = VR_VARYING;
|
|
+ m_min = m_max = NULL;
|
|
}
|
|
|
|
void
|
|
@@ -335,6 +334,24 @@ value_range::equiv_add (const_tree var,
|
|
bool
|
|
value_range_base::singleton_p (tree *result) const
|
|
{
|
|
+ if (m_kind == VR_ANTI_RANGE)
|
|
+ {
|
|
+ if (nonzero_p ())
|
|
+ {
|
|
+ if (TYPE_PRECISION (type ()) == 1)
|
|
+ {
|
|
+ if (result)
|
|
+ *result = m_max;
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ value_range_base vr0, vr1;
|
|
+ return (ranges_from_anti_range (this, &vr0, &vr1, true)
|
|
+ && vr1.undefined_p ()
|
|
+ && vr0.singleton_p (result));
|
|
+ }
|
|
if (m_kind == VR_RANGE
|
|
&& vrp_operand_equal_p (min (), max ())
|
|
&& is_gimple_min_invariant (min ()))
|
|
@@ -510,23 +527,28 @@ static assert_locus **asserts_for;
|
|
/* Return the maximum value for TYPE. */
|
|
|
|
tree
|
|
-vrp_val_max (const_tree type)
|
|
+vrp_val_max (const_tree type, bool handle_pointers)
|
|
{
|
|
- if (!INTEGRAL_TYPE_P (type))
|
|
- return NULL_TREE;
|
|
-
|
|
- return TYPE_MAX_VALUE (type);
|
|
+ if (INTEGRAL_TYPE_P (type))
|
|
+ return TYPE_MAX_VALUE (type);
|
|
+ if (POINTER_TYPE_P (type) && handle_pointers)
|
|
+ {
|
|
+ wide_int max = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
|
|
+ return wide_int_to_tree (const_cast<tree> (type), max);
|
|
+ }
|
|
+ return NULL_TREE;
|
|
}
|
|
|
|
/* Return the minimum value for TYPE. */
|
|
|
|
tree
|
|
-vrp_val_min (const_tree type)
|
|
+vrp_val_min (const_tree type, bool handle_pointers)
|
|
{
|
|
- if (!INTEGRAL_TYPE_P (type))
|
|
- return NULL_TREE;
|
|
-
|
|
- return TYPE_MIN_VALUE (type);
|
|
+ if (INTEGRAL_TYPE_P (type))
|
|
+ return TYPE_MIN_VALUE (type);
|
|
+ if (POINTER_TYPE_P (type) && handle_pointers)
|
|
+ return build_zero_cst (const_cast<tree> (type));
|
|
+ return NULL_TREE;
|
|
}
|
|
|
|
/* Return whether VAL is equal to the maximum value of its type.
|
|
@@ -637,8 +659,7 @@ intersect_range_with_nonzero_bits (enum value_range_kind vr_type,
|
|
extract ranges from var + CST op limit. */
|
|
|
|
void
|
|
-value_range_base::set_and_canonicalize (enum value_range_kind kind,
|
|
- tree min, tree max)
|
|
+value_range_base::set (enum value_range_kind kind, tree min, tree max)
|
|
{
|
|
/* Use the canonical setters for VR_UNDEFINED and VR_VARYING. */
|
|
if (kind == VR_UNDEFINED)
|
|
@@ -652,11 +673,31 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
|
|
return;
|
|
}
|
|
|
|
+ /* Convert POLY_INT_CST bounds into worst-case INTEGER_CST bounds. */
|
|
+ if (POLY_INT_CST_P (min))
|
|
+ {
|
|
+ tree type_min = vrp_val_min (TREE_TYPE (min), true);
|
|
+ widest_int lb
|
|
+ = constant_lower_bound_with_limit (wi::to_poly_widest (min),
|
|
+ wi::to_widest (type_min));
|
|
+ min = wide_int_to_tree (TREE_TYPE (min), lb);
|
|
+ }
|
|
+ if (POLY_INT_CST_P (max))
|
|
+ {
|
|
+ tree type_max = vrp_val_max (TREE_TYPE (max), true);
|
|
+ widest_int ub
|
|
+ = constant_upper_bound_with_limit (wi::to_poly_widest (max),
|
|
+ wi::to_widest (type_max));
|
|
+ max = wide_int_to_tree (TREE_TYPE (max), ub);
|
|
+ }
|
|
+
|
|
/* Nothing to canonicalize for symbolic ranges. */
|
|
if (TREE_CODE (min) != INTEGER_CST
|
|
|| TREE_CODE (max) != INTEGER_CST)
|
|
{
|
|
- set (kind, min, max);
|
|
+ m_kind = kind;
|
|
+ m_min = min;
|
|
+ m_max = max;
|
|
return;
|
|
}
|
|
|
|
@@ -692,12 +733,13 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
|
|
kind = kind == VR_RANGE ? VR_ANTI_RANGE : VR_RANGE;
|
|
}
|
|
|
|
+ tree type = TREE_TYPE (min);
|
|
+
|
|
/* Anti-ranges that can be represented as ranges should be so. */
|
|
if (kind == VR_ANTI_RANGE)
|
|
{
|
|
/* For -fstrict-enums we may receive out-of-range ranges so consider
|
|
values < -INF and values > INF as -INF/INF as well. */
|
|
- tree type = TREE_TYPE (min);
|
|
bool is_min = (INTEGRAL_TYPE_P (type)
|
|
&& tree_int_cst_compare (min, TYPE_MIN_VALUE (type)) <= 0);
|
|
bool is_max = (INTEGRAL_TYPE_P (type)
|
|
@@ -740,22 +782,37 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
|
|
}
|
|
}
|
|
|
|
+ /* Normalize [MIN, MAX] into VARYING and ~[MIN, MAX] into UNDEFINED.
|
|
+
|
|
+ Avoid using TYPE_{MIN,MAX}_VALUE because -fstrict-enums can
|
|
+ restrict those to a subset of what actually fits in the type.
|
|
+ Instead use the extremes of the type precision which will allow
|
|
+ compare_range_with_value() to check if a value is inside a range,
|
|
+ whereas if we used TYPE_*_VAL, said function would just punt
|
|
+ upon seeing a VARYING. */
|
|
+ unsigned prec = TYPE_PRECISION (type);
|
|
+ signop sign = TYPE_SIGN (type);
|
|
+ if (wi::eq_p (wi::to_wide (min), wi::min_value (prec, sign))
|
|
+ && wi::eq_p (wi::to_wide (max), wi::max_value (prec, sign)))
|
|
+ {
|
|
+ if (kind == VR_RANGE)
|
|
+ set_varying ();
|
|
+ else if (kind == VR_ANTI_RANGE)
|
|
+ set_undefined ();
|
|
+ else
|
|
+ gcc_unreachable ();
|
|
+ return;
|
|
+ }
|
|
+
|
|
/* Do not drop [-INF(OVF), +INF(OVF)] to varying. (OVF) has to be sticky
|
|
to make sure VRP iteration terminates, otherwise we can get into
|
|
oscillations. */
|
|
|
|
- set (kind, min, max);
|
|
-}
|
|
-
|
|
-void
|
|
-value_range::set_and_canonicalize (enum value_range_kind kind,
|
|
- tree min, tree max, bitmap equiv)
|
|
-{
|
|
- value_range_base::set_and_canonicalize (kind, min, max);
|
|
- if (this->kind () == VR_RANGE || this->kind () == VR_ANTI_RANGE)
|
|
- set_equiv (equiv);
|
|
- else
|
|
- equiv_clear ();
|
|
+ m_kind = kind;
|
|
+ m_min = min;
|
|
+ m_max = max;
|
|
+ if (flag_checking)
|
|
+ check ();
|
|
}
|
|
|
|
void
|
|
@@ -776,32 +833,19 @@ value_range::set (tree val)
|
|
set (VR_RANGE, val, val, NULL);
|
|
}
|
|
|
|
-/* Set value range VR to a non-NULL range of type TYPE. */
|
|
+/* Set value range VR to a nonzero range of type TYPE. */
|
|
|
|
void
|
|
-value_range_base::set_nonnull (tree type)
|
|
+value_range_base::set_nonzero (tree type)
|
|
{
|
|
tree zero = build_int_cst (type, 0);
|
|
set (VR_ANTI_RANGE, zero, zero);
|
|
}
|
|
|
|
-void
|
|
-value_range::set_nonnull (tree type)
|
|
-{
|
|
- tree zero = build_int_cst (type, 0);
|
|
- set (VR_ANTI_RANGE, zero, zero, NULL);
|
|
-}
|
|
-
|
|
-/* Set value range VR to a NULL range of type TYPE. */
|
|
+/* Set value range VR to a ZERO range of type TYPE. */
|
|
|
|
void
|
|
-value_range_base::set_null (tree type)
|
|
-{
|
|
- set (build_int_cst (type, 0));
|
|
-}
|
|
-
|
|
-void
|
|
-value_range::set_null (tree type)
|
|
+value_range_base::set_zero (tree type)
|
|
{
|
|
set (build_int_cst (type, 0));
|
|
}
|
|
@@ -830,22 +874,6 @@ vrp_bitmap_equal_p (const_bitmap b1, const_bitmap b2)
|
|
&& bitmap_equal_p (b1, b2)));
|
|
}
|
|
|
|
-/* Return true if VR is [0, 0]. */
|
|
-
|
|
-static inline bool
|
|
-range_is_null (const value_range_base *vr)
|
|
-{
|
|
- return vr->zero_p ();
|
|
-}
|
|
-
|
|
-static inline bool
|
|
-range_is_nonnull (const value_range_base *vr)
|
|
-{
|
|
- return (vr->kind () == VR_ANTI_RANGE
|
|
- && vr->min () == vr->max ()
|
|
- && integer_zerop (vr->min ()));
|
|
-}
|
|
-
|
|
/* Return true if max and min of VR are INTEGER_CST. It's not necessary
|
|
a singleton. */
|
|
|
|
@@ -949,22 +977,17 @@ operand_less_p (tree val, tree val2)
|
|
/* LT is folded faster than GE and others. Inline the common case. */
|
|
if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
|
|
return tree_int_cst_lt (val, val2);
|
|
+ else if (TREE_CODE (val) == SSA_NAME && TREE_CODE (val2) == SSA_NAME)
|
|
+ return val == val2 ? 0 : -2;
|
|
else
|
|
{
|
|
- tree tcmp;
|
|
-
|
|
- fold_defer_overflow_warnings ();
|
|
-
|
|
- tcmp = fold_binary_to_constant (LT_EXPR, boolean_type_node, val, val2);
|
|
-
|
|
- fold_undefer_and_ignore_overflow_warnings ();
|
|
-
|
|
- if (!tcmp
|
|
- || TREE_CODE (tcmp) != INTEGER_CST)
|
|
- return -2;
|
|
-
|
|
- if (!integer_zerop (tcmp))
|
|
+ int cmp = compare_values (val, val2);
|
|
+ if (cmp == -1)
|
|
return 1;
|
|
+ else if (cmp == 0 || cmp == 1)
|
|
+ return 0;
|
|
+ else
|
|
+ return -2;
|
|
}
|
|
|
|
return 0;
|
|
@@ -998,8 +1021,8 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
|
|
|
|
/* Convert the two values into the same type. This is needed because
|
|
sizetype causes sign extension even for unsigned types. */
|
|
- val2 = fold_convert (TREE_TYPE (val1), val2);
|
|
- STRIP_USELESS_TYPE_CONVERSION (val2);
|
|
+ if (!useless_type_conversion_p (TREE_TYPE (val1), TREE_TYPE (val2)))
|
|
+ val2 = fold_convert (TREE_TYPE (val1), val2);
|
|
|
|
const bool overflow_undefined
|
|
= INTEGRAL_TYPE_P (TREE_TYPE (val1))
|
|
@@ -1107,32 +1130,43 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
|
|
}
|
|
else
|
|
{
|
|
- tree t;
|
|
+ if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
|
|
+ {
|
|
+ /* We cannot compare overflowed values. */
|
|
+ if (TREE_OVERFLOW (val1) || TREE_OVERFLOW (val2))
|
|
+ return -2;
|
|
+
|
|
+ return tree_int_cst_compare (val1, val2);
|
|
+ }
|
|
|
|
/* First see if VAL1 and VAL2 are not the same. */
|
|
- if (val1 == val2 || operand_equal_p (val1, val2, 0))
|
|
+ if (operand_equal_p (val1, val2, 0))
|
|
return 0;
|
|
|
|
+ fold_defer_overflow_warnings ();
|
|
+
|
|
/* If VAL1 is a lower address than VAL2, return -1. */
|
|
- if (operand_less_p (val1, val2) == 1)
|
|
- return -1;
|
|
+ tree t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val1, val2);
|
|
+ if (t && integer_onep (t))
|
|
+ {
|
|
+ fold_undefer_and_ignore_overflow_warnings ();
|
|
+ return -1;
|
|
+ }
|
|
|
|
/* If VAL1 is a higher address than VAL2, return +1. */
|
|
- if (operand_less_p (val2, val1) == 1)
|
|
- return 1;
|
|
-
|
|
- /* If VAL1 is different than VAL2, return +2.
|
|
- For integer constants we either have already returned -1 or 1
|
|
- or they are equivalent. We still might succeed in proving
|
|
- something about non-trivial operands. */
|
|
- if (TREE_CODE (val1) != INTEGER_CST
|
|
- || TREE_CODE (val2) != INTEGER_CST)
|
|
+ t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val2, val1);
|
|
+ if (t && integer_onep (t))
|
|
{
|
|
- t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
|
|
- if (t && integer_onep (t))
|
|
- return 2;
|
|
+ fold_undefer_and_ignore_overflow_warnings ();
|
|
+ return 1;
|
|
}
|
|
|
|
+ /* If VAL1 is different than VAL2, return +2. */
|
|
+ t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
|
|
+ fold_undefer_and_ignore_overflow_warnings ();
|
|
+ if (t && integer_onep (t))
|
|
+ return 2;
|
|
+
|
|
return -2;
|
|
}
|
|
}
|
|
@@ -1231,7 +1265,8 @@ vrp_set_zero_nonzero_bits (const tree expr_type,
|
|
|
|
static bool
|
|
ranges_from_anti_range (const value_range_base *ar,
|
|
- value_range_base *vr0, value_range_base *vr1)
|
|
+ value_range_base *vr0, value_range_base *vr1,
|
|
+ bool handle_pointers)
|
|
{
|
|
tree type = ar->type ();
|
|
|
|
@@ -1244,18 +1279,18 @@ ranges_from_anti_range (const value_range_base *ar,
|
|
if (ar->kind () != VR_ANTI_RANGE
|
|
|| TREE_CODE (ar->min ()) != INTEGER_CST
|
|
|| TREE_CODE (ar->max ()) != INTEGER_CST
|
|
- || !vrp_val_min (type)
|
|
- || !vrp_val_max (type))
|
|
+ || !vrp_val_min (type, handle_pointers)
|
|
+ || !vrp_val_max (type, handle_pointers))
|
|
return false;
|
|
|
|
- if (tree_int_cst_lt (vrp_val_min (type), ar->min ()))
|
|
+ if (tree_int_cst_lt (vrp_val_min (type, handle_pointers), ar->min ()))
|
|
vr0->set (VR_RANGE,
|
|
- vrp_val_min (type),
|
|
+ vrp_val_min (type, handle_pointers),
|
|
wide_int_to_tree (type, wi::to_wide (ar->min ()) - 1));
|
|
- if (tree_int_cst_lt (ar->max (), vrp_val_max (type)))
|
|
+ if (tree_int_cst_lt (ar->max (), vrp_val_max (type, handle_pointers)))
|
|
vr1->set (VR_RANGE,
|
|
wide_int_to_tree (type, wi::to_wide (ar->max ()) + 1),
|
|
- vrp_val_max (type));
|
|
+ vrp_val_max (type, handle_pointers));
|
|
if (vr0->undefined_p ())
|
|
{
|
|
*vr0 = *vr1;
|
|
@@ -1266,21 +1301,20 @@ ranges_from_anti_range (const value_range_base *ar,
|
|
}
|
|
|
|
/* Extract the components of a value range into a pair of wide ints in
|
|
- [WMIN, WMAX].
|
|
-
|
|
- If the value range is anything but a VR_*RANGE of constants, the
|
|
- resulting wide ints are set to [-MIN, +MAX] for the type. */
|
|
+ [WMIN, WMAX], after having normalized any symbolics from the input. */
|
|
|
|
static void inline
|
|
-extract_range_into_wide_ints (const value_range_base *vr,
|
|
- signop sign, unsigned prec,
|
|
- wide_int &wmin, wide_int &wmax)
|
|
+extract_range_into_wide_ints (const value_range_base *vr_,
|
|
+ tree type, wide_int &wmin, wide_int &wmax)
|
|
{
|
|
- gcc_assert (vr->kind () != VR_ANTI_RANGE || vr->symbolic_p ());
|
|
- if (range_int_cst_p (vr))
|
|
+ signop sign = TYPE_SIGN (type);
|
|
+ unsigned int prec = TYPE_PRECISION (type);
|
|
+ gcc_assert (vr_->kind () != VR_ANTI_RANGE || vr_->symbolic_p ());
|
|
+ value_range vr = vr_->normalize_symbolics ();
|
|
+ if (range_int_cst_p (&vr))
|
|
{
|
|
- wmin = wi::to_wide (vr->min ());
|
|
- wmax = wi::to_wide (vr->max ());
|
|
+ wmin = wi::to_wide (vr.min ());
|
|
+ wmax = wi::to_wide (vr.max ());
|
|
}
|
|
else
|
|
{
|
|
@@ -1295,7 +1329,7 @@ extract_range_into_wide_ints (const value_range_base *vr,
|
|
|
|
static void
|
|
extract_range_from_multiplicative_op (value_range_base *vr,
|
|
- enum tree_code code,
|
|
+ enum tree_code code, tree type,
|
|
const value_range_base *vr0,
|
|
const value_range_base *vr1)
|
|
{
|
|
@@ -1307,13 +1341,31 @@ extract_range_from_multiplicative_op (value_range_base *vr,
|
|
|| code == ROUND_DIV_EXPR
|
|
|| code == RSHIFT_EXPR
|
|
|| code == LSHIFT_EXPR);
|
|
- gcc_assert (vr0->kind () == VR_RANGE
|
|
- && vr0->kind () == vr1->kind ());
|
|
+ if (!range_int_cst_p (vr1))
|
|
+ {
|
|
+ vr->set_varying ();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Even if vr0 is VARYING or otherwise not usable, we can derive
|
|
+ useful ranges just from the shift count. E.g.
|
|
+ x >> 63 for signed 64-bit x is always [-1, 0]. */
|
|
+ value_range_base tem = vr0->normalize_symbolics ();
|
|
+ tree vr0_min, vr0_max;
|
|
+ if (tem.kind () == VR_RANGE)
|
|
+ {
|
|
+ vr0_min = tem.min ();
|
|
+ vr0_max = tem.max ();
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ vr0_min = vrp_val_min (type);
|
|
+ vr0_max = vrp_val_max (type);
|
|
+ }
|
|
|
|
- tree type = vr0->type ();
|
|
wide_int res_lb, res_ub;
|
|
- wide_int vr0_lb = wi::to_wide (vr0->min ());
|
|
- wide_int vr0_ub = wi::to_wide (vr0->max ());
|
|
+ wide_int vr0_lb = wi::to_wide (vr0_min);
|
|
+ wide_int vr0_ub = wi::to_wide (vr0_max);
|
|
wide_int vr1_lb = wi::to_wide (vr1->min ());
|
|
wide_int vr1_ub = wi::to_wide (vr1->max ());
|
|
bool overflow_undefined = TYPE_OVERFLOW_UNDEFINED (type);
|
|
@@ -1323,9 +1375,8 @@ extract_range_from_multiplicative_op (value_range_base *vr,
|
|
code, TYPE_SIGN (type), prec,
|
|
vr0_lb, vr0_ub, vr1_lb, vr1_ub,
|
|
overflow_undefined))
|
|
- vr->set_and_canonicalize (VR_RANGE,
|
|
- wide_int_to_tree (type, res_lb),
|
|
- wide_int_to_tree (type, res_ub));
|
|
+ vr->set (VR_RANGE, wide_int_to_tree (type, res_lb),
|
|
+ wide_int_to_tree (type, res_ub));
|
|
else
|
|
vr->set_varying ();
|
|
}
|
|
@@ -1583,9 +1634,9 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
code is EXACT_DIV_EXPR. We could mask out bits in the resulting
|
|
range, but then we also need to hack up vrp_union. It's just
|
|
easier to special case when vr0 is ~[0,0] for EXACT_DIV_EXPR. */
|
|
- if (code == EXACT_DIV_EXPR && range_is_nonnull (&vr0))
|
|
+ if (code == EXACT_DIV_EXPR && vr0.nonzero_p ())
|
|
{
|
|
- vr->set_nonnull (expr_type);
|
|
+ vr->set_nonzero (expr_type);
|
|
return;
|
|
}
|
|
|
|
@@ -1663,9 +1714,9 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
If both are null, then the result is null. Otherwise they
|
|
are varying. */
|
|
if (!range_includes_zero_p (&vr0) && !range_includes_zero_p (&vr1))
|
|
- vr->set_nonnull (expr_type);
|
|
- else if (range_is_null (&vr0) && range_is_null (&vr1))
|
|
- vr->set_null (expr_type);
|
|
+ vr->set_nonzero (expr_type);
|
|
+ else if (vr0.zero_p () && vr1.zero_p ())
|
|
+ vr->set_zero (expr_type);
|
|
else
|
|
vr->set_varying ();
|
|
}
|
|
@@ -1692,9 +1743,9 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
&& (flag_delete_null_pointer_checks
|
|
|| (range_int_cst_p (&vr1)
|
|
&& !tree_int_cst_sign_bit (vr1.max ()))))
|
|
- vr->set_nonnull (expr_type);
|
|
- else if (range_is_null (&vr0) && range_is_null (&vr1))
|
|
- vr->set_null (expr_type);
|
|
+ vr->set_nonzero (expr_type);
|
|
+ else if (vr0.zero_p () && vr1.zero_p ())
|
|
+ vr->set_zero (expr_type);
|
|
else
|
|
vr->set_varying ();
|
|
}
|
|
@@ -1702,8 +1753,8 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
{
|
|
/* For pointer types, we are really only interested in asserting
|
|
whether the expression evaluates to non-NULL. */
|
|
- if (range_is_null (&vr0) || range_is_null (&vr1))
|
|
- vr->set_null (expr_type);
|
|
+ if (vr0.zero_p () || vr1.zero_p ())
|
|
+ vr->set_zero (expr_type);
|
|
else
|
|
vr->set_varying ();
|
|
}
|
|
@@ -1717,19 +1768,30 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
range and see what we end up with. */
|
|
if (code == PLUS_EXPR || code == MINUS_EXPR)
|
|
{
|
|
+ value_range_kind vr0_kind = vr0.kind (), vr1_kind = vr1.kind ();
|
|
+ tree vr0_min = vr0.min (), vr0_max = vr0.max ();
|
|
+ tree vr1_min = vr1.min (), vr1_max = vr1.max ();
|
|
/* This will normalize things such that calculating
|
|
[0,0] - VR_VARYING is not dropped to varying, but is
|
|
calculated as [MIN+1, MAX]. */
|
|
if (vr0.varying_p ())
|
|
- vr0.set (VR_RANGE, vrp_val_min (expr_type), vrp_val_max (expr_type));
|
|
+ {
|
|
+ vr0_kind = VR_RANGE;
|
|
+ vr0_min = vrp_val_min (expr_type);
|
|
+ vr0_max = vrp_val_max (expr_type);
|
|
+ }
|
|
if (vr1.varying_p ())
|
|
- vr1.set (VR_RANGE, vrp_val_min (expr_type), vrp_val_max (expr_type));
|
|
+ {
|
|
+ vr1_kind = VR_RANGE;
|
|
+ vr1_min = vrp_val_min (expr_type);
|
|
+ vr1_max = vrp_val_max (expr_type);
|
|
+ }
|
|
|
|
const bool minus_p = (code == MINUS_EXPR);
|
|
- tree min_op0 = vr0.min ();
|
|
- tree min_op1 = minus_p ? vr1.max () : vr1.min ();
|
|
- tree max_op0 = vr0.max ();
|
|
- tree max_op1 = minus_p ? vr1.min () : vr1.max ();
|
|
+ tree min_op0 = vr0_min;
|
|
+ tree min_op1 = minus_p ? vr1_max : vr1_min;
|
|
+ tree max_op0 = vr0_max;
|
|
+ tree max_op1 = minus_p ? vr1_min : vr1_max;
|
|
tree sym_min_op0 = NULL_TREE;
|
|
tree sym_min_op1 = NULL_TREE;
|
|
tree sym_max_op0 = NULL_TREE;
|
|
@@ -1742,7 +1804,7 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
single-symbolic ranges, try to compute the precise resulting range,
|
|
but only if we know that this resulting range will also be constant
|
|
or single-symbolic. */
|
|
- if (vr0.kind () == VR_RANGE && vr1.kind () == VR_RANGE
|
|
+ if (vr0_kind == VR_RANGE && vr1_kind == VR_RANGE
|
|
&& (TREE_CODE (min_op0) == INTEGER_CST
|
|
|| (sym_min_op0
|
|
= get_single_symbol (min_op0, &neg_min_op0, &min_op0)))
|
|
@@ -1823,8 +1885,8 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
wide_int wmin, wmax;
|
|
wide_int vr0_min, vr0_max;
|
|
wide_int vr1_min, vr1_max;
|
|
- extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
|
|
- extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
|
|
+ extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
|
|
+ extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
|
|
if (wide_int_range_min_max (wmin, wmax, code, sign, prec,
|
|
vr0_min, vr0_max, vr1_min, vr1_max))
|
|
vr->set (VR_RANGE, wide_int_to_tree (expr_type, wmin),
|
|
@@ -1841,7 +1903,7 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
vr->set_varying ();
|
|
return;
|
|
}
|
|
- extract_range_from_multiplicative_op (vr, code, &vr0, &vr1);
|
|
+ extract_range_from_multiplicative_op (vr, code, expr_type, &vr0, &vr1);
|
|
return;
|
|
}
|
|
else if (code == RSHIFT_EXPR
|
|
@@ -1856,13 +1918,8 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
{
|
|
if (code == RSHIFT_EXPR)
|
|
{
|
|
- /* Even if vr0 is VARYING or otherwise not usable, we can derive
|
|
- useful ranges just from the shift count. E.g.
|
|
- x >> 63 for signed 64-bit x is always [-1, 0]. */
|
|
- if (vr0.kind () != VR_RANGE || vr0.symbolic_p ())
|
|
- vr0.set (VR_RANGE, vrp_val_min (expr_type),
|
|
- vrp_val_max (expr_type));
|
|
- extract_range_from_multiplicative_op (vr, code, &vr0, &vr1);
|
|
+ extract_range_from_multiplicative_op (vr, code, expr_type,
|
|
+ &vr0, &vr1);
|
|
return;
|
|
}
|
|
else if (code == LSHIFT_EXPR
|
|
@@ -1878,7 +1935,7 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
{
|
|
min = wide_int_to_tree (expr_type, res_lb);
|
|
max = wide_int_to_tree (expr_type, res_ub);
|
|
- vr->set_and_canonicalize (VR_RANGE, min, max);
|
|
+ vr->set (VR_RANGE, min, max);
|
|
return;
|
|
}
|
|
}
|
|
@@ -1897,7 +1954,7 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
bool extra_range_p;
|
|
|
|
/* Special case explicit division by zero as undefined. */
|
|
- if (range_is_null (&vr1))
|
|
+ if (vr1.zero_p ())
|
|
{
|
|
vr->set_undefined ();
|
|
return;
|
|
@@ -1910,9 +1967,9 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
NOTE: As a future improvement, we may be able to do better
|
|
with mixed symbolic (anti-)ranges like [0, A]. See note in
|
|
ranges_from_anti_range. */
|
|
- extract_range_into_wide_ints (&vr0, sign, prec,
|
|
+ extract_range_into_wide_ints (&vr0, expr_type,
|
|
dividend_min, dividend_max);
|
|
- extract_range_into_wide_ints (&vr1, sign, prec,
|
|
+ extract_range_into_wide_ints (&vr1, expr_type,
|
|
divisor_min, divisor_max);
|
|
if (!wide_int_range_div (wmin, wmax, code, sign, prec,
|
|
dividend_min, dividend_max,
|
|
@@ -1936,15 +1993,15 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
}
|
|
else if (code == TRUNC_MOD_EXPR)
|
|
{
|
|
- if (range_is_null (&vr1))
|
|
+ if (vr1.zero_p ())
|
|
{
|
|
vr->set_undefined ();
|
|
return;
|
|
}
|
|
wide_int wmin, wmax, tmp;
|
|
wide_int vr0_min, vr0_max, vr1_min, vr1_max;
|
|
- extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
|
|
- extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
|
|
+ extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
|
|
+ extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
|
|
wide_int_range_trunc_mod (wmin, wmax, sign, prec,
|
|
vr0_min, vr0_max, vr1_min, vr1_max);
|
|
min = wide_int_to_tree (expr_type, wmin);
|
|
@@ -1962,8 +2019,8 @@ extract_range_from_binary_expr (value_range_base *vr,
|
|
&may_be_nonzero0, &must_be_nonzero0);
|
|
vrp_set_zero_nonzero_bits (expr_type, &vr1,
|
|
&may_be_nonzero1, &must_be_nonzero1);
|
|
- extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
|
|
- extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
|
|
+ extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
|
|
+ extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
|
|
if (code == BIT_AND_EXPR)
|
|
{
|
|
if (wide_int_range_bit_and (wmin, wmax, sign, prec,
|
|
@@ -2140,9 +2197,9 @@ extract_range_from_unary_expr (value_range_base *vr,
|
|
if (POINTER_TYPE_P (type) || POINTER_TYPE_P (op0_type))
|
|
{
|
|
if (!range_includes_zero_p (&vr0))
|
|
- vr->set_nonnull (type);
|
|
- else if (range_is_null (&vr0))
|
|
- vr->set_null (type);
|
|
+ vr->set_nonzero (type);
|
|
+ else if (vr0.zero_p ())
|
|
+ vr->set_zero (type);
|
|
else
|
|
vr->set_varying ();
|
|
return;
|
|
@@ -2167,8 +2224,7 @@ extract_range_from_unary_expr (value_range_base *vr,
|
|
signop outer_sign = TYPE_SIGN (outer_type);
|
|
unsigned inner_prec = TYPE_PRECISION (inner_type);
|
|
unsigned outer_prec = TYPE_PRECISION (outer_type);
|
|
- extract_range_into_wide_ints (&vr0, inner_sign, inner_prec,
|
|
- vr0_min, vr0_max);
|
|
+ extract_range_into_wide_ints (&vr0, inner_type, vr0_min, vr0_max);
|
|
if (wide_int_range_convert (wmin, wmax,
|
|
inner_sign, inner_prec,
|
|
outer_sign, outer_prec,
|
|
@@ -2176,7 +2232,7 @@ extract_range_from_unary_expr (value_range_base *vr,
|
|
{
|
|
tree min = wide_int_to_tree (outer_type, wmin);
|
|
tree max = wide_int_to_tree (outer_type, wmax);
|
|
- vr->set_and_canonicalize (VR_RANGE, min, max);
|
|
+ vr->set (VR_RANGE, min, max);
|
|
}
|
|
else
|
|
vr->set_varying ();
|
|
@@ -2186,7 +2242,7 @@ extract_range_from_unary_expr (value_range_base *vr,
|
|
{
|
|
wide_int wmin, wmax;
|
|
wide_int vr0_min, vr0_max;
|
|
- extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
|
|
+ extract_range_into_wide_ints (&vr0, type, vr0_min, vr0_max);
|
|
if (wide_int_range_abs (wmin, wmax, sign, prec, vr0_min, vr0_max,
|
|
TYPE_OVERFLOW_UNDEFINED (type)))
|
|
vr->set (VR_RANGE, wide_int_to_tree (type, wmin),
|
|
@@ -2199,7 +2255,8 @@ extract_range_from_unary_expr (value_range_base *vr,
|
|
{
|
|
wide_int wmin, wmax;
|
|
wide_int vr0_min, vr0_max;
|
|
- extract_range_into_wide_ints (&vr0, SIGNED, prec, vr0_min, vr0_max);
|
|
+ tree signed_type = make_signed_type (TYPE_PRECISION (type));
|
|
+ extract_range_into_wide_ints (&vr0, signed_type, vr0_min, vr0_max);
|
|
wide_int_range_absu (wmin, wmax, prec, vr0_min, vr0_max);
|
|
vr->set (VR_RANGE, wide_int_to_tree (type, wmin),
|
|
wide_int_to_tree (type, wmax));
|
|
@@ -5468,8 +5525,10 @@ union_ranges (enum value_range_kind *vr0type,
|
|
enum value_range_kind vr1type,
|
|
tree vr1min, tree vr1max)
|
|
{
|
|
- bool mineq = vrp_operand_equal_p (*vr0min, vr1min);
|
|
- bool maxeq = vrp_operand_equal_p (*vr0max, vr1max);
|
|
+ int cmpmin = compare_values (*vr0min, vr1min);
|
|
+ int cmpmax = compare_values (*vr0max, vr1max);
|
|
+ bool mineq = cmpmin == 0;
|
|
+ bool maxeq = cmpmax == 0;
|
|
|
|
/* [] is vr0, () is vr1 in the following classification comments. */
|
|
if (mineq && maxeq)
|
|
@@ -5569,8 +5628,8 @@ union_ranges (enum value_range_kind *vr0type,
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
- else if ((maxeq || operand_less_p (vr1max, *vr0max) == 1)
|
|
- && (mineq || operand_less_p (*vr0min, vr1min) == 1))
|
|
+ else if ((maxeq || cmpmax == 1)
|
|
+ && (mineq || cmpmin == -1))
|
|
{
|
|
/* [ ( ) ] or [( ) ] or [ ( )] */
|
|
if (*vr0type == VR_RANGE
|
|
@@ -5603,8 +5662,8 @@ union_ranges (enum value_range_kind *vr0type,
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
- else if ((maxeq || operand_less_p (*vr0max, vr1max) == 1)
|
|
- && (mineq || operand_less_p (vr1min, *vr0min) == 1))
|
|
+ else if ((maxeq || cmpmax == -1)
|
|
+ && (mineq || cmpmin == 1))
|
|
{
|
|
/* ( [ ] ) or ([ ] ) or ( [ ]) */
|
|
if (*vr0type == VR_RANGE
|
|
@@ -5643,10 +5702,10 @@ union_ranges (enum value_range_kind *vr0type,
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
- else if ((operand_less_p (vr1min, *vr0max) == 1
|
|
- || operand_equal_p (vr1min, *vr0max, 0))
|
|
- && operand_less_p (*vr0min, vr1min) == 1
|
|
- && operand_less_p (*vr0max, vr1max) == 1)
|
|
+ else if (cmpmin == -1
|
|
+ && cmpmax == -1
|
|
+ && (operand_less_p (vr1min, *vr0max) == 1
|
|
+ || operand_equal_p (vr1min, *vr0max, 0)))
|
|
{
|
|
/* [ ( ] ) or [ ]( ) */
|
|
if (*vr0type == VR_RANGE
|
|
@@ -5680,10 +5739,10 @@ union_ranges (enum value_range_kind *vr0type,
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
- else if ((operand_less_p (*vr0min, vr1max) == 1
|
|
- || operand_equal_p (*vr0min, vr1max, 0))
|
|
- && operand_less_p (vr1min, *vr0min) == 1
|
|
- && operand_less_p (vr1max, *vr0max) == 1)
|
|
+ else if (cmpmin == 1
|
|
+ && cmpmax == 1
|
|
+ && (operand_less_p (*vr0min, vr1max) == 1
|
|
+ || operand_equal_p (*vr0min, vr1max, 0)))
|
|
{
|
|
/* ( [ ) ] or ( )[ ] */
|
|
if (*vr0type == VR_RANGE
|
|
@@ -6083,7 +6142,7 @@ value_range::intersect_helper (value_range *vr0, const value_range *vr1)
|
|
VR_RANGE can still be a VR_RANGE. Work on a temporary so we can
|
|
fall back to vr0 when this turns things to varying. */
|
|
value_range tem;
|
|
- tem.set_and_canonicalize (vr0type, vr0min, vr0max);
|
|
+ tem.set (vr0type, vr0min, vr0max);
|
|
/* If that failed, use the saved original VR0. */
|
|
if (tem.varying_p ())
|
|
return;
|
|
@@ -6152,8 +6211,8 @@ value_range_base::union_helper (const value_range_base *vr0,
|
|
vr1->kind (), vr1->min (), vr1->max ());
|
|
|
|
/* Work on a temporary so we can still use vr0 when union returns varying. */
|
|
- value_range tem;
|
|
- tem.set_and_canonicalize (vr0type, vr0min, vr0max);
|
|
+ value_range_base tem;
|
|
+ tem.set (vr0type, vr0min, vr0max);
|
|
|
|
/* Failed to find an efficient meet. Before giving up and setting
|
|
the result to VARYING, see if we can at least derive a useful
|
|
@@ -6162,7 +6221,7 @@ value_range_base::union_helper (const value_range_base *vr0,
|
|
&& range_includes_zero_p (vr0) == 0
|
|
&& range_includes_zero_p (vr1) == 0)
|
|
{
|
|
- tem.set_nonnull (vr0->type ());
|
|
+ tem.set_nonzero (vr0->type ());
|
|
return tem;
|
|
}
|
|
|
|
@@ -6233,6 +6292,58 @@ value_range::union_ (const value_range *other)
|
|
}
|
|
}
|
|
|
|
+/* Normalize symbolics into constants. */
|
|
+
|
|
+value_range_base
|
|
+value_range_base::normalize_symbolics () const
|
|
+{
|
|
+ if (varying_p () || undefined_p ())
|
|
+ return *this;
|
|
+ tree ttype = type ();
|
|
+ bool min_symbolic = !is_gimple_min_invariant (min ());
|
|
+ bool max_symbolic = !is_gimple_min_invariant (max ());
|
|
+ if (!min_symbolic && !max_symbolic)
|
|
+ return *this;
|
|
+
|
|
+ // [SYM, SYM] -> VARYING
|
|
+ if (min_symbolic && max_symbolic)
|
|
+ {
|
|
+ value_range_base var;
|
|
+ var.set_varying ();
|
|
+ return var;
|
|
+ }
|
|
+ if (kind () == VR_RANGE)
|
|
+ {
|
|
+ // [SYM, NUM] -> [-MIN, NUM]
|
|
+ if (min_symbolic)
|
|
+ return value_range_base (VR_RANGE, vrp_val_min (ttype), max ());
|
|
+ // [NUM, SYM] -> [NUM, +MAX]
|
|
+ return value_range_base (VR_RANGE, min (), vrp_val_max (ttype));
|
|
+ }
|
|
+ gcc_assert (kind () == VR_ANTI_RANGE);
|
|
+ // ~[SYM, NUM] -> [NUM + 1, +MAX]
|
|
+ if (min_symbolic)
|
|
+ {
|
|
+ if (!vrp_val_is_max (max ()))
|
|
+ {
|
|
+ tree n = wide_int_to_tree (ttype, wi::to_wide (max ()) + 1);
|
|
+ return value_range_base (VR_RANGE, n, vrp_val_max (ttype));
|
|
+ }
|
|
+ value_range_base var;
|
|
+ var.set_varying ();
|
|
+ return var;
|
|
+ }
|
|
+ // ~[NUM, SYM] -> [-MIN, NUM - 1]
|
|
+ if (!vrp_val_is_min (min ()))
|
|
+ {
|
|
+ tree n = wide_int_to_tree (ttype, wi::to_wide (min ()) - 1);
|
|
+ return value_range_base (VR_RANGE, vrp_val_min (ttype), n);
|
|
+ }
|
|
+ value_range_base var;
|
|
+ var.set_varying ();
|
|
+ return var;
|
|
+}
|
|
+
|
|
/* Visit all arguments for PHI node PHI that flow through executable
|
|
edges. If a valid value range can be derived from all the incoming
|
|
value ranges, set a new range for the LHS of PHI. */
|
|
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
|
|
index 9d52b428d05..4bcff924b58 100644
|
|
--- a/gcc/tree-vrp.h
|
|
+++ b/gcc/tree-vrp.h
|
|
@@ -46,8 +46,8 @@ public:
|
|
|
|
void set (value_range_kind, tree, tree);
|
|
void set (tree);
|
|
- void set_nonnull (tree);
|
|
- void set_null (tree);
|
|
+ void set_nonzero (tree);
|
|
+ void set_zero (tree);
|
|
|
|
enum value_range_kind kind () const;
|
|
tree min () const;
|
|
@@ -70,11 +70,13 @@ public:
|
|
/* Misc methods. */
|
|
tree type () const;
|
|
bool may_contain_p (tree) const;
|
|
- void set_and_canonicalize (enum value_range_kind, tree, tree);
|
|
bool zero_p () const;
|
|
+ bool nonzero_p () const;
|
|
bool singleton_p (tree *result = NULL) const;
|
|
void dump (FILE *) const;
|
|
|
|
+ value_range_base normalize_symbolics () const;
|
|
+
|
|
protected:
|
|
void check ();
|
|
static value_range_base union_helper (const value_range_base *,
|
|
@@ -118,8 +120,6 @@ class GTY((user)) value_range : public value_range_base
|
|
/* Deep-copies equiv bitmap argument. */
|
|
void set (value_range_kind, tree, tree, bitmap = NULL);
|
|
void set (tree);
|
|
- void set_nonnull (tree);
|
|
- void set_null (tree);
|
|
|
|
bool operator== (const value_range &) const /* = delete */;
|
|
bool operator!= (const value_range &) const /* = delete */;
|
|
@@ -138,7 +138,6 @@ class GTY((user)) value_range : public value_range_base
|
|
|
|
/* Misc methods. */
|
|
void deep_copy (const value_range *);
|
|
- void set_and_canonicalize (enum value_range_kind, tree, tree, bitmap = NULL);
|
|
void dump (FILE *) const;
|
|
|
|
private:
|
|
@@ -222,6 +221,16 @@ value_range_base::zero_p () const
|
|
&& integer_zerop (m_max));
|
|
}
|
|
|
|
+/* Return TRUE if range is nonzero. */
|
|
+
|
|
+inline bool
|
|
+value_range_base::nonzero_p () const
|
|
+{
|
|
+ return (m_kind == VR_ANTI_RANGE
|
|
+ && integer_zerop (m_min)
|
|
+ && integer_zerop (m_max));
|
|
+}
|
|
+
|
|
extern void dump_value_range (FILE *, const value_range *);
|
|
extern void dump_value_range (FILE *, const value_range_base *);
|
|
|
|
@@ -259,8 +268,8 @@ extern bool vrp_val_is_min (const_tree);
|
|
extern bool vrp_val_is_max (const_tree);
|
|
extern int value_inside_range (tree, tree, tree);
|
|
|
|
-extern tree vrp_val_min (const_tree);
|
|
-extern tree vrp_val_max (const_tree);
|
|
+extern tree vrp_val_min (const_tree, bool handle_pointers = false);
|
|
+extern tree vrp_val_max (const_tree, bool handle_pointers = false);
|
|
|
|
extern void extract_range_from_unary_expr (value_range_base *vr,
|
|
enum tree_code code,
|
|
diff --git a/gcc/tree.c b/gcc/tree.c
|
|
index 32e94e48132..c4b8eea675f 100644
|
|
--- a/gcc/tree.c
|
|
+++ b/gcc/tree.c
|
|
@@ -8213,8 +8213,6 @@ build_nonstandard_integer_type (unsigned HOST_WIDE_INT precision,
|
|
else
|
|
fixup_signed_type (itype);
|
|
|
|
- ret = itype;
|
|
-
|
|
inchash::hash hstate;
|
|
inchash::add_expr (TYPE_MAX_VALUE (itype), hstate);
|
|
ret = type_hash_canon (hstate.end (), itype);
|
|
@@ -11079,44 +11077,44 @@ build_vector_type (tree innertype, poly_int64 nunits)
|
|
return make_vector_type (innertype, nunits, VOIDmode);
|
|
}
|
|
|
|
-/* Build truth vector with specified length and number of units. */
|
|
+/* Build a truth vector with NUNITS units, giving it mode MASK_MODE. */
|
|
|
|
tree
|
|
-build_truth_vector_type (poly_uint64 nunits, poly_uint64 vector_size)
|
|
+build_truth_vector_type_for_mode (poly_uint64 nunits, machine_mode mask_mode)
|
|
{
|
|
- machine_mode mask_mode
|
|
- = targetm.vectorize.get_mask_mode (nunits, vector_size).else_blk ();
|
|
-
|
|
- poly_uint64 vsize;
|
|
- if (mask_mode == BLKmode)
|
|
- vsize = vector_size * BITS_PER_UNIT;
|
|
- else
|
|
- vsize = GET_MODE_BITSIZE (mask_mode);
|
|
+ gcc_assert (mask_mode != BLKmode);
|
|
|
|
+ poly_uint64 vsize = GET_MODE_BITSIZE (mask_mode);
|
|
unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits);
|
|
-
|
|
tree bool_type = build_nonstandard_boolean_type (esize);
|
|
|
|
return make_vector_type (bool_type, nunits, mask_mode);
|
|
}
|
|
|
|
-/* Returns a vector type corresponding to a comparison of VECTYPE. */
|
|
+/* Build a vector type that holds one boolean result for each element of
|
|
+ vector type VECTYPE. The public interface for this operation is
|
|
+ truth_type_for. */
|
|
|
|
-tree
|
|
-build_same_sized_truth_vector_type (tree vectype)
|
|
+static tree
|
|
+build_truth_vector_type_for (tree vectype)
|
|
{
|
|
- if (VECTOR_BOOLEAN_TYPE_P (vectype))
|
|
- return vectype;
|
|
+ machine_mode vector_mode = TYPE_MODE (vectype);
|
|
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
|
|
|
- poly_uint64 size = GET_MODE_SIZE (TYPE_MODE (vectype));
|
|
+ machine_mode mask_mode;
|
|
+ if (VECTOR_MODE_P (vector_mode)
|
|
+ && targetm.vectorize.get_mask_mode (vector_mode).exists (&mask_mode))
|
|
+ return build_truth_vector_type_for_mode (nunits, mask_mode);
|
|
|
|
- if (known_eq (size, 0U))
|
|
- size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
|
|
+ poly_uint64 vsize = tree_to_poly_uint64 (TYPE_SIZE (vectype));
|
|
+ unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits);
|
|
+ tree bool_type = build_nonstandard_boolean_type (esize);
|
|
|
|
- return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype), size);
|
|
+ return make_vector_type (bool_type, nunits, BLKmode);
|
|
}
|
|
|
|
-/* Similarly, but builds a variant type with TYPE_VECTOR_OPAQUE set. */
|
|
+/* Like build_vector_type, but builds a variant type with TYPE_VECTOR_OPAQUE
|
|
+ set. */
|
|
|
|
tree
|
|
build_opaque_vector_type (tree innertype, poly_int64 nunits)
|
|
@@ -11915,8 +11913,7 @@ truth_type_for (tree type)
|
|
{
|
|
if (VECTOR_BOOLEAN_TYPE_P (type))
|
|
return type;
|
|
- return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
|
|
- GET_MODE_SIZE (TYPE_MODE (type)));
|
|
+ return build_truth_vector_type_for (type);
|
|
}
|
|
else
|
|
return boolean_type_node;
|
|
diff --git a/gcc/tree.h b/gcc/tree.h
|
|
index 2f8e37bb356..6f73593faa7 100644
|
|
--- a/gcc/tree.h
|
|
+++ b/gcc/tree.h
|
|
@@ -4272,8 +4272,7 @@ extern tree build_reference_type_for_mode (tree, machine_mode, bool);
|
|
extern tree build_reference_type (tree);
|
|
extern tree build_vector_type_for_mode (tree, machine_mode);
|
|
extern tree build_vector_type (tree, poly_int64);
|
|
-extern tree build_truth_vector_type (poly_uint64, poly_uint64);
|
|
-extern tree build_same_sized_truth_vector_type (tree vectype);
|
|
+extern tree build_truth_vector_type_for_mode (poly_uint64, machine_mode);
|
|
extern tree build_opaque_vector_type (tree, poly_int64);
|
|
extern tree build_index_type (tree);
|
|
extern tree build_array_type (tree, tree, bool = false);
|
|
diff --git a/gcc/vr-values.c b/gcc/vr-values.c
|
|
index 0e10aca92bb..02c89ab030a 100644
|
|
--- a/gcc/vr-values.c
|
|
+++ b/gcc/vr-values.c
|
|
@@ -118,7 +118,10 @@ vr_values::get_value_range (const_tree var)
|
|
if (POINTER_TYPE_P (TREE_TYPE (sym))
|
|
&& (nonnull_arg_p (sym)
|
|
|| get_ptr_nonnull (var)))
|
|
- vr->set_nonnull (TREE_TYPE (sym));
|
|
+ {
|
|
+ vr->set_nonzero (TREE_TYPE (sym));
|
|
+ vr->equiv_clear ();
|
|
+ }
|
|
else if (INTEGRAL_TYPE_P (TREE_TYPE (sym)))
|
|
{
|
|
get_range_info (var, *vr);
|
|
@@ -130,7 +133,10 @@ vr_values::get_value_range (const_tree var)
|
|
}
|
|
else if (TREE_CODE (sym) == RESULT_DECL
|
|
&& DECL_BY_REFERENCE (sym))
|
|
- vr->set_nonnull (TREE_TYPE (sym));
|
|
+ {
|
|
+ vr->set_nonzero (TREE_TYPE (sym));
|
|
+ vr->equiv_clear ();
|
|
+ }
|
|
}
|
|
|
|
return vr;
|
|
@@ -491,9 +497,9 @@ vr_values::extract_range_for_var_from_comparison_expr (tree var,
|
|
vice-versa. Use set_and_canonicalize which does this for
|
|
us. */
|
|
if (cond_code == LE_EXPR)
|
|
- vr_p->set_and_canonicalize (VR_RANGE, min, max, vr_p->equiv ());
|
|
+ vr_p->set (VR_RANGE, min, max, vr_p->equiv ());
|
|
else if (cond_code == GT_EXPR)
|
|
- vr_p->set_and_canonicalize (VR_ANTI_RANGE, min, max, vr_p->equiv ());
|
|
+ vr_p->set (VR_ANTI_RANGE, min, max, vr_p->equiv ());
|
|
else
|
|
gcc_unreachable ();
|
|
}
|
|
@@ -565,7 +571,7 @@ vr_values::extract_range_for_var_from_comparison_expr (tree var,
|
|
&& vrp_val_is_max (max))
|
|
min = max = limit;
|
|
|
|
- vr_p->set_and_canonicalize (VR_ANTI_RANGE, min, max, vr_p->equiv ());
|
|
+ vr_p->set (VR_ANTI_RANGE, min, max, vr_p->equiv ());
|
|
}
|
|
else if (cond_code == LE_EXPR || cond_code == LT_EXPR)
|
|
{
|
|
@@ -858,7 +864,10 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
|
|
|| (vr1.kind () == VR_ANTI_RANGE
|
|
&& vr1.min () == op0
|
|
&& vr1.min () == vr1.max ())))
|
|
- vr->set_nonnull (expr_type);
|
|
+ {
|
|
+ vr->set_nonzero (expr_type);
|
|
+ vr->equiv_clear ();
|
|
+ }
|
|
}
|
|
|
|
/* Extract range information from a unary expression CODE OP0 based on
|
|
@@ -1085,7 +1094,8 @@ vr_values::extract_range_basic (value_range *vr, gimple *stmt)
|
|
&& TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL
|
|
&& cfun->after_inlining)
|
|
{
|
|
- vr->set_null (type);
|
|
+ vr->set_zero (type);
|
|
+ vr->equiv_clear ();
|
|
return;
|
|
}
|
|
break;
|
|
@@ -1392,7 +1402,10 @@ vr_values::extract_range_basic (value_range *vr, gimple *stmt)
|
|
&& gimple_stmt_nonnegative_warnv_p (stmt, &sop))
|
|
set_value_range_to_nonnegative (vr, type);
|
|
else if (vrp_stmt_computes_nonzero (stmt))
|
|
- vr->set_nonnull (type);
|
|
+ {
|
|
+ vr->set_nonzero (type);
|
|
+ vr->equiv_clear ();
|
|
+ }
|
|
else
|
|
vr->set_varying ();
|
|
}
|
|
diff --git a/libgcc/libgcov-driver-system.c b/libgcc/libgcov-driver-system.c
|
|
index b5f3e89ebdc..0d106002098 100644
|
|
--- a/libgcc/libgcov-driver-system.c
|
|
+++ b/libgcc/libgcov-driver-system.c
|
|
@@ -262,10 +262,8 @@ static int
|
|
gcov_exit_open_gcda_file (struct gcov_info *gi_ptr,
|
|
struct gcov_filename *gf)
|
|
{
|
|
- const char *fname = gi_ptr->filename;
|
|
int append_slash = 0;
|
|
-
|
|
- fname = gi_ptr->filename;
|
|
+ const char *fname = gi_ptr->filename;
|
|
|
|
/* Build relocated filename, stripping off leading
|
|
directories from the initial filename if requested. */
|
|
diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
|
|
index ae0dd017204..e672768966b 100644
|
|
--- a/libgcc/libgcov-util.c
|
|
+++ b/libgcc/libgcov-util.c
|
|
@@ -461,10 +461,9 @@ gcov_read_profile_dir (const char* dir_name, int recompute_summary ATTRIBUTE_UNU
|
|
#ifdef HAVE_FTW_H
|
|
ftw (".", ftw_read_file, 50);
|
|
#endif
|
|
- ret = chdir (pwd);
|
|
+ chdir (pwd);
|
|
free (pwd);
|
|
|
|
-
|
|
return gcov_info_head;;
|
|
}
|
|
|