diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h --- a/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:43:27.862079389 +0800 +++ b/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:44:34.930081154 +0800 @@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void); const char *aarch64_mangle_builtin_type (const_tree); const char *aarch64_output_casesi (rtx *); +extern void aarch64_pr_long_calls (struct cpp_reader *); +extern void aarch64_pr_no_long_calls (struct cpp_reader *); +extern void aarch64_pr_long_calls_off (struct cpp_reader *); + enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx); enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); enum reg_class aarch64_regno_regclass (unsigned); @@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool); void aarch64_expand_mov_immediate (rtx, rtx); void aarch64_expand_prologue (void); void aarch64_expand_vector_init (rtx, rtx); +void aarch64_function_profiler (FILE *, int); void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, const_tree, unsigned); void aarch64_init_expanders (void); diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c --- a/gcc/config/aarch64/aarch64.c 2018-11-06 10:43:27.870079389 +0800 +++ b/gcc/config/aarch64/aarch64.c 2018-11-06 10:44:34.934081154 +0800 @@ -70,6 +70,9 @@ /* This file should be included last. */ #include "target-def.h" +static void aarch64_set_default_type_attributes (tree); +static int aarch64_comp_type_attributes (const_tree, const_tree); + /* Defined for convenience. */ #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) @@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns return choose_hard_reg_mode (regno, nregs, false); } +/* Table of machine attributes. */ +static const struct attribute_spec aarch64_attribute_table[] = +{ + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, + affects_type_identity }. */ + /* Function calls made to this symbol must be done indirectly, because + it may lie outside of the 26 bit addressing range of a normal function + call. */ + { "long_call", 0, 0, false, true, true, NULL, false }, + /* Whereas these functions are always known to reside within the 26 bit + addressing range. */ + { "short_call", 0, 0, false, true, true, NULL, false }, + { NULL, 0, 0, false, false, false, NULL, false } +}; + +/* Encode the current state of the #pragma[no_]long_calls. */ +typedef enum +{ + OFF, /* No #pragma[no_]long_calls is in effect. */ + LONG, /* #pragma long_calls is in effect. */ + SHORT /* #pragma no_long_calls is in effect. */ +} aarch64_pragma_enum; + +static aarch64_pragma_enum aarch64_pragma_long_calls = OFF; + +void +aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + aarch64_pragma_long_calls = LONG; +} + +void +aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + aarch64_pragma_long_calls = SHORT; +} + +void +aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) +{ + aarch64_pragma_long_calls = OFF; +} + +/* Return 0 if the attributes for two types are incompatible, 1 if they + are compatible. */ +static int +aarch64_comp_type_attributes (const_tree type1, const_tree type2) +{ + int l1, l2, s1, s2; + + /* Check for mismatch of non-default calling convention. */ + if (TREE_CODE (type1) != FUNCTION_TYPE) + return 1; + + /* Check for mismatched call attributes. */ + l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; + l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; + s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; + s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; + + /* Only bother to check if an attribute is defined. */ + if (l1 | l2 | s1 | s2) + { + /* If one type has an attribute, the other + must have the same attribute. */ + if ((l1 != l2) || (s1 != s2)) + { + return 0; + } + + /* Disallow mixed attributes. */ + if ((l1 && s2) || (l2 && s1)) + { + return 0; + } + } + + return 1; +} + +/* Assigns default attributes to newly defined type. This is used to + set short_call/long_call attributes for function types of + functions defined inside corresponding #pragma scopes. */ +static void +aarch64_set_default_type_attributes (tree type) +{ + /* Add __attribute__ ((long_call)) to all functions, when + inside #pragma long_calls or __attribute__ ((short_call)), + when inside #pragma no_long_calls. */ + if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) + { + tree type_attr_list = NULL; + tree attr_name = NULL; + type_attr_list = TYPE_ATTRIBUTES (type); + + if (aarch64_pragma_long_calls == LONG) + { + attr_name = get_identifier ("long_call"); + } + else if (aarch64_pragma_long_calls == SHORT) + { + attr_name = get_identifier ("short_call"); + } + else + { + return; + } + + type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); + TYPE_ATTRIBUTES (type) = type_attr_list; + } +} + +/* Return true if DECL is known to be linked into section SECTION. */ +static bool +aarch64_function_in_section_p (tree decl, section *section) +{ + /* We can only be certain about the prevailing symbol definition. */ + if (!decl_binds_to_current_def_p (decl)) + return false; + + /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ + if (!DECL_SECTION_NAME (decl)) + { + /* Make sure that we will not create a unique section for DECL. */ + if (flag_function_sections || DECL_COMDAT_GROUP (decl)) + return false; + } + + return function_section (decl) == section; +} + /* Return true if calls to DECL should be treated as long-calls (ie called via a register). */ static bool -aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) +aarch64_decl_is_long_call_p (tree decl) { - return false; + tree attrs = NULL; + + if (!decl) + return TARGET_LONG_CALLS; + + attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); + if (lookup_attribute ("short_call", attrs)) + return false; + + /* For "f", be conservative, and only cater for cases in which the + whole of the current function is placed in the same section. */ + if (!flag_reorder_blocks_and_partition + && TREE_CODE (decl) == FUNCTION_DECL + && aarch64_function_in_section_p (decl, current_function_section ())) + return false; + + if (lookup_attribute ("long_call", attrs)) + return true; + + return TARGET_LONG_CALLS; } /* Return true if calls to symbol-ref SYM should be treated as @@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym) return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); } +void +aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) +{ + if (!TARGET_LONG_CALLS) + { + fprintf (file, "\tmov\tx9, x30\n"); + fprintf (file, "\tbl\t__fentry__\n"); + fprintf (file, "\tmov\tx30, x9\n"); + } + else + { + if (flag_pic) + { + fprintf (file, "\tmov\tx9, x30\n"); + fprintf (file, "\tadrp\tx10, :got:__fentry__\n"); + fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n"); + fprintf (file, "\tblr\tx10\n"); + fprintf (file, "\tmov\tx30, x9\n"); + } + else + { + fprintf (file, "\tmov\tx9, x30\n"); + fprintf (file, "\tadrp\tx10, __fentry__\n"); + fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n"); + fprintf (file, "\tblr\tx10\n"); + fprintf (file, "\tmov\tx30, x9\n"); + } + } +} + /* Return true if calls to symbol-ref SYM should not go through plt stubs. */ @@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_SCHED_CAN_SPECULATE_INSN #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn +#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES +#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes + +#undef TARGET_ATTRIBUTE_TABLE +#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table + +#undef TARGET_COMP_TYPE_ATTRIBUTES +#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes + #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h --- a/gcc/config/aarch64/aarch64.h 2018-11-06 10:43:27.870079389 +0800 +++ b/gcc/config/aarch64/aarch64.h 2018-11-06 10:49:29.574088911 +0800 @@ -28,7 +28,6 @@ -#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () /* Target machine storage layout. */ @@ -659,6 +658,14 @@ typedef struct } CUMULATIVE_ARGS; #endif +/* Handle pragmas for compatibility with Intel's compilers. */ +#define REGISTER_TARGET_PRAGMAS() do { \ + c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \ + c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \ + c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \ + aarch64_register_pragmas (); \ +} while (0) + #define FUNCTION_ARG_PADDING(MODE, TYPE) \ (aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward) @@ -842,13 +849,20 @@ typedef struct #define PROFILE_HOOK(LABEL) \ { \ rtx fun, lr; \ + const rtx_insn* tmp = get_insns (); \ lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ + if (TARGET_LONG_CALLS) \ + { \ + emit_insn (gen_blockage ()); \ + emit_insn_after (gen_blockage (), NEXT_INSN (tmp)); \ + } \ } /* All the work done in PROFILE_HOOK, but still required. */ -#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) +#define FUNCTION_PROFILER(STREAM, LABELNO) \ + aarch64_function_profiler (STREAM, LABELNO) /* For some reason, the Linux headers think they know how to define these macros. They don't!!! */ diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md --- a/gcc/config/aarch64/aarch64.md 2018-11-06 10:43:27.874079389 +0800 +++ b/gcc/config/aarch64/aarch64.md 2018-11-06 10:44:34.934081154 +0800 @@ -850,9 +850,10 @@ { rtx pat; rtx callee = XEXP (operands[0], 0); - if (!REG_P (callee) - && ((GET_CODE (callee) != SYMBOL_REF) - || aarch64_is_noplt_call_p (callee))) + + if (GET_CODE (callee) == SYMBOL_REF + ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee)) + : !REG_P (callee)) XEXP (operands[0], 0) = force_reg (Pmode, callee); if (operands[2] == NULL_RTX) @@ -881,9 +882,10 @@ { rtx pat; rtx callee = XEXP (operands[1], 0); - if (!REG_P (callee) - && ((GET_CODE (callee) != SYMBOL_REF) - || aarch64_is_noplt_call_p (callee))) + + if (GET_CODE (callee) == SYMBOL_REF + ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee)) + : !REG_P (callee)) XEXP (operands[1], 0) = force_reg (Pmode, callee); if (operands[3] == NULL_RTX) diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt --- a/gcc/config/aarch64/aarch64.opt 2018-11-06 10:43:27.874079389 +0800 +++ b/gcc/config/aarch64/aarch64.opt 2018-11-06 10:44:34.934081154 +0800 @@ -80,6 +80,10 @@ mlittle-endian Target Report RejectNegative InverseMask(BIG_END) Assume target CPU is configured as little endian. +mlong-calls +Target Report Mask(LONG_CALLS) +Generate call insns as indirect calls, if necessary. + mcmodel= Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save Specify the code model. diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md --- a/gcc/config/aarch64/predicates.md 2018-11-06 10:43:27.878079389 +0800 +++ b/gcc/config/aarch64/predicates.md 2018-11-06 10:44:34.938081154 +0800 @@ -27,8 +27,9 @@ ) (define_predicate "aarch64_call_insn_operand" - (ior (match_code "symbol_ref") - (match_operand 0 "register_operand"))) + (ior (and (match_code "symbol_ref") + (match_test "!aarch64_is_long_call_p (op)")) + (match_operand 0 "register_operand"))) ;; Return true if OP a (const_int 0) operand. (define_predicate "const0_operand"