diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c --- a/gcc/config/aarch64/aarch64.c 2021-02-18 11:03:29.728000000 +0800 +++ b/gcc/config/aarch64/aarch64.c 2021-02-18 14:59:54.432000000 +0800 @@ -2417,6 +2417,29 @@ aarch64_load_symref_appropriately (rtx d emit_insn (gen_add_losym (dest, tmp_reg, imm)); return; } + case SYMBOL_MEDIUM_ABSOLUTE: + { + rtx tmp_reg = dest; + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Pmode || mode == ptr_mode); + if (can_create_pseudo_p ()) + tmp_reg = gen_reg_rtx (mode); + + if (mode == DImode) + { + emit_insn (gen_load_symbol_medium_di (dest, tmp_reg, imm)); + } + else + { + emit_insn (gen_load_symbol_medium_si (dest, tmp_reg, imm)); + } + if (REG_P (dest)) + { + set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (imm)); + } + return; + } case SYMBOL_TINY_ABSOLUTE: emit_insn (gen_rtx_SET (dest, imm)); @@ -2539,6 +2562,60 @@ aarch64_load_symref_appropriately (rtx d return; } + case SYMBOL_MEDIUM_GOT_4G: + { + rtx tmp_reg = dest; + machine_mode mode = GET_MODE (dest); + if (can_create_pseudo_p ()) + { + tmp_reg = gen_reg_rtx (mode); + } + rtx insn; + rtx mem; + rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + + if (mode == DImode) + { + emit_insn (gen_load_symbol_medium_di (tmp_reg, dest, s)); + } + else + { + emit_insn (gen_load_symbol_medium_si (tmp_reg, dest, s)); + } + if (REG_P (dest)) + { + set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (s)); + } + + if (mode == ptr_mode) + { + if (mode == DImode) + { + emit_insn (gen_get_gotoff_di (dest, imm)); + insn = gen_ldr_got_medium_di (dest, tmp_reg, dest); + } + else + { + emit_insn (gen_get_gotoff_si (dest, imm)); + insn = gen_ldr_got_medium_si (dest, tmp_reg, dest); + } + mem = XVECEXP (SET_SRC (insn), 0, 0); + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_get_gotoff_di (dest, imm)); + insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest); + mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); + } + + gcc_assert (GET_CODE (mem) == MEM); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + emit_insn (insn); + return; + } + case SYMBOL_SMALL_TLSGD: { rtx_insn *insns; @@ -4531,11 +4608,12 @@ aarch64_expand_mov_immediate (rtx dest, return; - case SYMBOL_SMALL_TLSGD: - case SYMBOL_SMALL_TLSDESC: + case SYMBOL_SMALL_TLSGD: + case SYMBOL_SMALL_TLSDESC: case SYMBOL_SMALL_TLSIE: case SYMBOL_SMALL_GOT_28K: case SYMBOL_SMALL_GOT_4G: + case SYMBOL_MEDIUM_GOT_4G: case SYMBOL_TINY_GOT: case SYMBOL_TINY_TLSIE: if (const_offset != 0) @@ -4554,6 +4632,7 @@ aarch64_expand_mov_immediate (rtx dest, case SYMBOL_TLSLE24: case SYMBOL_TLSLE32: case SYMBOL_TLSLE48: + case SYMBOL_MEDIUM_ABSOLUTE: aarch64_load_symref_appropriately (dest, imm, sty); return; @@ -8450,7 +8529,14 @@ aarch64_classify_address (struct aarch64 split_const (info->offset, &sym, &offs); if (GET_CODE (sym) == SYMBOL_REF && (aarch64_classify_symbol (sym, INTVAL (offs)) - == SYMBOL_SMALL_ABSOLUTE)) + == SYMBOL_SMALL_ABSOLUTE + /* Fix fail on dbl_mov_immediate_1.c. If end up here with + MEDIUM_ABSOLUTE, the symbol is a constant number that is + forced to memory in reload pass, which is ok to go on with + the original design that subtitude the mov to + 'adrp and ldr :losum'. */ + || aarch64_classify_symbol (sym, INTVAL (offs)) + == SYMBOL_MEDIUM_ABSOLUTE)) { /* The symbol and offset must be aligned to the access size. */ unsigned int align; @@ -10365,7 +10451,13 @@ static inline bool aarch64_can_use_per_function_literal_pools_p (void) { return (aarch64_pcrelative_literal_loads - || aarch64_cmodel == AARCH64_CMODEL_LARGE); + || aarch64_cmodel == AARCH64_CMODEL_LARGE + /* Fix const9.C so that constants goes to function_literal_pools. + According to the orignal design of aarch64 mcmodel=medium, we + don't care where this symbol is put. For the benefit of code size + and behaviour consistent with other mcmodel, put it into + function_literal_pools. */ + || aarch64_cmodel == AARCH64_CMODEL_MEDIUM); } static bool @@ -11993,6 +12085,13 @@ cost_plus: if (speed) *cost += extra_cost->alu.arith; } + else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM + || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) + { + /* 4 movs adr sub add 2movs ldr. */ + if (speed) + *cost += 7*extra_cost->alu.arith; + } if (flag_pic) { @@ -12000,6 +12099,8 @@ cost_plus: *cost += COSTS_N_INSNS (1); if (speed) *cost += extra_cost->ldst.load; + if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) + *cost += 2*extra_cost->alu.arith; } return true; @@ -13176,6 +13277,7 @@ initialize_aarch64_tls_size (struct gcc_ if (aarch64_tls_size > 32) aarch64_tls_size = 32; break; + case AARCH64_CMODEL_MEDIUM: case AARCH64_CMODEL_LARGE: /* The maximum TLS size allowed under large is 16E. FIXME: 16E should be 64bit, we only support 48bit offset now. */ @@ -13968,6 +14070,9 @@ initialize_aarch64_code_model (struct gc aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; #endif break; + case AARCH64_CMODEL_MEDIUM: + aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC; + break; case AARCH64_CMODEL_LARGE: sorry ("code model %qs with %<-f%s%>", "large", opts->x_flag_pic > 1 ? "PIC" : "pic"); @@ -13986,6 +14091,7 @@ static void aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts) { ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string; + ptr->x_aarch64_data_threshold = opts->x_aarch64_data_threshold; ptr->x_aarch64_branch_protection_string = opts->x_aarch64_branch_protection_string; } @@ -14001,6 +14107,7 @@ aarch64_option_restore (struct gcc_optio opts->x_explicit_arch = ptr->x_explicit_arch; selected_arch = aarch64_get_arch (ptr->x_explicit_arch); opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; + opts->x_aarch64_data_threshold = ptr->x_aarch64_data_threshold; opts->x_aarch64_branch_protection_string = ptr->x_aarch64_branch_protection_string; if (opts->x_aarch64_branch_protection_string) @@ -14868,6 +14975,8 @@ aarch64_classify_symbol (rtx x, HOST_WID case AARCH64_CMODEL_SMALL_SPIC: case AARCH64_CMODEL_SMALL_PIC: + case AARCH64_CMODEL_MEDIUM_PIC: + case AARCH64_CMODEL_MEDIUM: case AARCH64_CMODEL_SMALL: return SYMBOL_SMALL_ABSOLUTE; @@ -14904,6 +15013,7 @@ aarch64_classify_symbol (rtx x, HOST_WID return SYMBOL_TINY_ABSOLUTE; case AARCH64_CMODEL_SMALL: + AARCH64_SMALL_ROUTINE: /* Same reasoning as the tiny code model, but the offset cap here is 1MB, allowing +/-3.9GB for the offset to the symbol. */ @@ -14927,7 +15037,50 @@ aarch64_classify_symbol (rtx x, HOST_WID ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G); return SYMBOL_SMALL_ABSOLUTE; + case AARCH64_CMODEL_MEDIUM: + { + tree decl_local = SYMBOL_REF_DECL (x); + if (decl_local != NULL + && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) + { + HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local)); + /* If the data is smaller than the threshold, goto + the small code model. Else goto the large code + model. */ + if (size >= HOST_WIDE_INT (aarch64_data_threshold)) + goto AARCH64_LARGE_ROUTINE; + } + goto AARCH64_SMALL_ROUTINE; + } + + case AARCH64_CMODEL_MEDIUM_PIC: + { + tree decl_local = SYMBOL_REF_DECL (x); + if (decl_local != NULL + && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) + { + HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local)); + if (size < HOST_WIDE_INT (aarch64_data_threshold)) + { + if (!aarch64_symbol_binds_local_p (x)) + { + /* flag_pic is 2 only when -fPIC is on, when we should + use 4G GOT. */ + return flag_pic == 2 ? SYMBOL_SMALL_GOT_4G + : SYMBOL_SMALL_GOT_28K ; + } + return SYMBOL_SMALL_ABSOLUTE; + } + } + if (!aarch64_symbol_binds_local_p (x)) + { + return SYMBOL_MEDIUM_GOT_4G; + } + return SYMBOL_MEDIUM_ABSOLUTE; + } + case AARCH64_CMODEL_LARGE: + AARCH64_LARGE_ROUTINE: /* This is alright even in PIC code as the constant pool reference is always PC relative and within the same translation unit. */ @@ -17789,6 +17942,8 @@ aarch64_asm_preferred_eh_data_format (in case AARCH64_CMODEL_SMALL: case AARCH64_CMODEL_SMALL_PIC: case AARCH64_CMODEL_SMALL_SPIC: + case AARCH64_CMODEL_MEDIUM: + case AARCH64_CMODEL_MEDIUM_PIC: /* text+got+data < 4Gb. 4-byte signed relocs are sufficient for everything. */ type = DW_EH_PE_sdata4; @@ -21014,7 +21169,14 @@ aarch64_empty_mask_is_expensive (unsigne bool aarch64_use_pseudo_pic_reg (void) { - return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC; + /* flag_pic is 2 when -fPIC is on, where we do not need the pseudo + pic reg. In medium code mode, when combine with -fpie/-fpic, there are + possibility that some symbol size smaller than the -mlarge-data-threshold + will still use SMALL_SPIC relocation, which need the pseudo pic reg. + Fix spill_1.c fail. */ + return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC + || (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC + && flag_pic != 2); } /* Implement TARGET_UNSPEC_MAY_TRAP_P. */ @@ -21024,6 +21186,7 @@ aarch64_unspec_may_trap_p (const_rtx x, { switch (XINT (x, 1)) { + case UNSPEC_GOTMEDIUMPIC4G: case UNSPEC_GOTSMALLPIC: case UNSPEC_GOTSMALLPIC28K: case UNSPEC_GOTTINYPIC: diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h --- a/gcc/config/aarch64/aarch64.h 2021-02-18 11:03:28.336000000 +0800 +++ b/gcc/config/aarch64/aarch64.h 2021-02-18 10:57:45.488000000 +0800 @@ -33,6 +33,10 @@ #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () +/* Default threshold 64-bit relocation data + with aarch64 medium memory model. */ +#define AARCH64_DEFAULT_LARGE_DATA_THRESHOLD 65536 + /* Target machine storage layout. */ #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md --- a/gcc/config/aarch64/aarch64.md 2021-02-18 11:03:28.340000000 +0800 +++ b/gcc/config/aarch64/aarch64.md 2021-02-18 10:57:45.488000000 +0800 @@ -224,6 +224,11 @@ UNSPEC_RSQRTS UNSPEC_NZCV UNSPEC_XPACLRI + UNSPEC_MOV_MEDIUM_SYMBOL + UNSPEC_GET_LAST_PC + UNSPEC_GOTMEDIUMPIC4G + UNSPEC_GET_GOTOFF + UNSPEC_LOAD_SYMBOL_MEDIUM UNSPEC_LD1_SVE UNSPEC_ST1_SVE UNSPEC_LDNT1_SVE @@ -6689,6 +6694,39 @@ [(set_attr "type" "load_4")] ) +(define_insn "get_gotoff_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand 1 "aarch64_valid_symref" "S")] + UNSPEC_GET_GOTOFF))] + "" + "movz\\t%x0, :gotoff_g1:%A1\;movk\\t%x0, :gotoff_g0_nc:%A1" + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +(define_insn "ldr_got_medium_" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(mem:PTR (lo_sum:PTR + (match_operand:PTR 1 "register_operand" "r") + (match_operand:PTR 2 "register_operand" "r")))] + UNSPEC_GOTMEDIUMPIC4G))] + "" + "ldr\\t%0, [%1, %2]" + [(set_attr "type" "load_4")] +) + +(define_insn "ldr_got_medium_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI [(mem:SI (lo_sum:DI + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")))] + UNSPEC_GOTMEDIUMPIC4G)))] + "TARGET_ILP32" + "ldr\\t%0, [%1, %2]" + [(set_attr "type" "load_4")] +) + (define_insn "ldr_got_small_28k_" [(set (match_operand:PTR 0 "register_operand" "=r") (unspec:PTR [(mem:PTR (lo_sum:PTR @@ -6852,6 +6890,23 @@ (set_attr "length" "12")] ) +(define_insn "load_symbol_medium_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand 2 "aarch64_valid_symref" "S")] + UNSPEC_LOAD_SYMBOL_MEDIUM)) + (clobber (match_operand:GPI 1 "register_operand" "=r"))] + "" + "movz\\t%x0, :prel_g3:%A2\;\\ + movk\\t%x0, :prel_g2_nc:%A2\;\\ + movk\\t%x0, :prel_g1_nc:%A2\;\\ + movk\\t%x0, :prel_g0_nc:%A2\;\\ + adr\\t%x1, .\;\\ + sub\\t%x1, %x1, 0x4\;\\ + add\\t%x0, %x0, %x1" + [(set_attr "type" "multiple") + (set_attr "length" "28")] +) + (define_expand "tlsdesc_small_" [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)] "TARGET_TLS_DESC" diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt --- a/gcc/config/aarch64/aarch64.opt 2021-02-18 11:03:28.340000000 +0800 +++ b/gcc/config/aarch64/aarch64.opt 2021-02-18 10:57:45.488000000 +0800 @@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_cor TargetVariable enum aarch64_arch explicit_arch = aarch64_no_arch +;; -mlarge-data-threshold= +TargetSave +int x_aarch64_data_threshold + TargetSave const char *x_aarch64_override_tune_string @@ -61,8 +65,15 @@ EnumValue Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL) EnumValue +Enum(cmodel) String(medium) Value(AARCH64_CMODEL_MEDIUM) + +EnumValue Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE) +mlarge-data-threshold= +Target RejectNegative Joined UInteger Var(aarch64_data_threshold) Init(AARCH64_DEFAULT_LARGE_DATA_THRESHOLD) +-mlarge-data-threshold= Data greater than given threshold will be assume that it should be relocated using 64-bit relocation. + mbig-endian Target Report RejectNegative Mask(BIG_END) Assume target CPU is configured as big endian. diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h --- a/gcc/config/aarch64/aarch64-opts.h 2020-03-12 19:07:21.000000000 +0800 +++ b/gcc/config/aarch64/aarch64-opts.h 2021-02-18 10:57:45.488000000 +0800 @@ -66,6 +66,10 @@ enum aarch64_code_model { /* -fpic for small memory model. GOT size to 28KiB (4K*8-4K) or 3580 entries. */ AARCH64_CMODEL_SMALL_SPIC, + /* Using movk insn sequence to do 64bit PC relative relocation. */ + AARCH64_CMODEL_MEDIUM, + /* Using movk insn sequence to do 64bit PC relative got relocation. */ + AARCH64_CMODEL_MEDIUM_PIC, /* No assumptions about addresses of code and data. The PIC variant is not yet implemented. */ AARCH64_CMODEL_LARGE diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h --- a/gcc/config/aarch64/aarch64-protos.h 2021-02-18 11:03:29.432000000 +0800 +++ b/gcc/config/aarch64/aarch64-protos.h 2021-02-18 10:57:45.488000000 +0800 @@ -95,9 +95,11 @@ */ enum aarch64_symbol_type { + SYMBOL_MEDIUM_ABSOLUTE, SYMBOL_SMALL_ABSOLUTE, SYMBOL_SMALL_GOT_28K, SYMBOL_SMALL_GOT_4G, + SYMBOL_MEDIUM_GOT_4G, SYMBOL_SMALL_TLSGD, SYMBOL_SMALL_TLSDESC, SYMBOL_SMALL_TLSIE,