diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c --- a/gcc/config/aarch64/aarch64.c 2018-09-19 17:11:42.583520820 +0800 +++ b/gcc/config/aarch64/aarch64.c 2018-09-19 17:10:22.715520820 +0800 @@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym) void aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) { - if (!TARGET_LONG_CALLS) + if (flag_fentry) { - fprintf (file, "\tmov\tx9, x30\n"); - fprintf (file, "\tbl\t__fentry__\n"); - fprintf (file, "\tmov\tx30, x9\n"); - } - else - { - if (flag_pic) + if (!TARGET_LONG_CALLS) { fprintf (file, "\tmov\tx9, x30\n"); - fprintf (file, "\tadrp\tx10, :got:__fentry__\n"); - fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n"); - fprintf (file, "\tblr\tx10\n"); + fprintf (file, "\tbl\t__fentry__\n"); fprintf (file, "\tmov\tx30, x9\n"); } else { - fprintf (file, "\tmov\tx9, x30\n"); - fprintf (file, "\tadrp\tx10, __fentry__\n"); - fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n"); - fprintf (file, "\tblr\tx10\n"); - fprintf (file, "\tmov\tx30, x9\n"); + if (flag_pic) + { + fprintf (file, "\tmov\tx9, x30\n"); + fprintf (file, "\tadrp\tx10, :got:__fentry__\n"); + fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n"); + fprintf (file, "\tblr\tx10\n"); + fprintf (file, "\tmov\tx30, x9\n"); + } + else + { + fprintf (file, "\tmov\tx9, x30\n"); + fprintf (file, "\tadrp\tx10, __fentry__\n"); + fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n"); + fprintf (file, "\tblr\tx10\n"); + fprintf (file, "\tmov\tx30, x9\n"); + } } } } @@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn) add_int_reg_note (jump, REG_BR_PROB, very_unlikely); } +/* Return true, if profiling code should be emitted before + prologue. Otherwise it returns false. + Note: For x86 with "hotfix" it is sorried. */ +static bool +aarch64_profile_before_prologue (void) +{ + return flag_fentry != 0; +} + /* Expand a compare and swap pattern. */ void @@ -14952,6 +14964,9 @@ aarch64_run_selftests (void) #undef TARGET_ASM_ALIGNED_SI_OP #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" +#undef TARGET_PROFILE_BEFORE_PROLOGUE +#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue + #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ hook_bool_const_tree_hwi_hwi_const_tree_true diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h --- a/gcc/config/aarch64/aarch64.h 2018-09-19 17:11:42.587520820 +0800 +++ b/gcc/config/aarch64/aarch64.h 2018-09-19 17:10:22.715520820 +0800 @@ -850,9 +850,12 @@ typedef struct { \ rtx fun, lr; \ const rtx_insn* tmp = get_insns (); \ - lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ - fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ - emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ + if (!flag_fentry) \ + { \ + lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ + fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ + emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ + } \ if (TARGET_LONG_CALLS) \ { \ emit_insn (gen_blockage ()); \ diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt --- a/gcc/config/aarch64/aarch64.opt 2018-09-19 17:11:42.587520820 +0800 +++ b/gcc/config/aarch64/aarch64.opt 2018-09-19 17:10:22.715520820 +0800 @@ -192,3 +192,7 @@ single precision and to 32 bits for doub mverbose-cost-dump Common Undocumented Var(flag_aarch64_verbose_cost) Enables verbose cost model dumping in the debug dump files. + +mfentry +Target Report Var(flag_fentry) Init(0) +Emit profiling counter call at function entry immediately after prologue.