diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c --- a/gcc/config/aarch64/aarch64.c 2020-07-16 14:54:30.588000000 +0800 +++ b/gcc/config/aarch64/aarch64.c 2020-07-16 15:06:33.000000000 +0800 @@ -2030,6 +2030,32 @@ aarch64_load_symref_appropriately (rtx d emit_insn (gen_add_losym (dest, tmp_reg, imm)); return; } + case SYMBOL_MEDIUM_ABSOLUTE: + { + rtx tmp_reg = dest; + machine_mode mode = GET_MODE (dest); + + gcc_assert (mode == Pmode || mode == ptr_mode); + if (can_create_pseudo_p ()) + tmp_reg = gen_reg_rtx (mode); + + if (mode == DImode) + { + emit_insn ( + gen_load_symbol_medium_di (dest, tmp_reg, imm)); + } + else + { + emit_insn ( + gen_load_symbol_medium_si (dest, tmp_reg, imm)); + } + if (REG_P (dest)) + { + set_unique_reg_note ( + get_last_insn (), REG_EQUIV, copy_rtx (imm)); + } + return; + } case SYMBOL_TINY_ABSOLUTE: emit_insn (gen_rtx_SET (dest, imm)); @@ -2152,6 +2178,64 @@ aarch64_load_symref_appropriately (rtx d return; } + case SYMBOL_MEDIUM_GOT_4G: + { + rtx tmp_reg = dest; + machine_mode mode = GET_MODE (dest); + if (can_create_pseudo_p ()) + { + tmp_reg = gen_reg_rtx (mode); + } + rtx insn; + rtx mem; + rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + + if (mode == DImode) + { + emit_insn ( + gen_load_symbol_medium_di (tmp_reg, dest, s)); + } + else + { + emit_insn ( + gen_load_symbol_medium_si (tmp_reg, dest, s)); + } + if (REG_P (dest)) + { + set_unique_reg_note ( + get_last_insn (), REG_EQUIV, copy_rtx (s)); + } + + if (mode == ptr_mode) + { + if (mode == DImode) + { + emit_insn (gen_get_gotoff_di (dest, imm)); + insn = gen_ldr_got_medium_di ( + dest, tmp_reg, dest); + } + else + { + emit_insn (gen_get_gotoff_si (dest, imm)); + insn = gen_ldr_got_medium_si ( + dest, tmp_reg, dest); + } + mem = XVECEXP (SET_SRC (insn), 0, 0); + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_get_gotoff_di (dest, imm)); + insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest); + mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); + } + + gcc_assert (GET_CODE (mem) == MEM); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + emit_insn (insn); + return; + } case SYMBOL_SMALL_TLSGD: { rtx_insn *insns; @@ -3372,11 +3456,12 @@ aarch64_expand_mov_immediate (rtx dest, return; - case SYMBOL_SMALL_TLSGD: - case SYMBOL_SMALL_TLSDESC: + case SYMBOL_SMALL_TLSGD: + case SYMBOL_SMALL_TLSDESC: case SYMBOL_SMALL_TLSIE: case SYMBOL_SMALL_GOT_28K: case SYMBOL_SMALL_GOT_4G: + case SYMBOL_MEDIUM_GOT_4G: case SYMBOL_TINY_GOT: case SYMBOL_TINY_TLSIE: if (const_offset != 0) @@ -3395,6 +3480,7 @@ aarch64_expand_mov_immediate (rtx dest, case SYMBOL_TLSLE24: case SYMBOL_TLSLE32: case SYMBOL_TLSLE48: + case SYMBOL_MEDIUM_ABSOLUTE: aarch64_load_symref_appropriately (dest, imm, sty); return; @@ -10334,6 +10420,13 @@ cost_plus: if (speed) *cost += extra_cost->alu.arith; } + else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM + || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) + { + /* 4 movs adr sub add 2movs ldr. */ + if (speed) + *cost += 7*extra_cost->alu.arith; + } if (flag_pic) { @@ -10341,6 +10434,8 @@ cost_plus: *cost += COSTS_N_INSNS (1); if (speed) *cost += extra_cost->ldst.load; + if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) + *cost += 2*extra_cost->alu.arith; } return true; @@ -11395,6 +11490,7 @@ initialize_aarch64_tls_size (struct gcc_ if (aarch64_tls_size > 32) aarch64_tls_size = 32; break; + case AARCH64_CMODEL_MEDIUM: case AARCH64_CMODEL_LARGE: /* The maximum TLS size allowed under large is 16E. FIXME: 16E should be 64bit, we only support 48bit offset now. */ @@ -12187,6 +12283,9 @@ initialize_aarch64_code_model (struct gc aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; #endif break; + case AARCH64_CMODEL_MEDIUM: + aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC; + break; case AARCH64_CMODEL_LARGE: sorry ("code model %qs with %<-f%s%>", "large", opts->x_flag_pic > 1 ? "PIC" : "pic"); @@ -12205,6 +12304,7 @@ static void aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts) { ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string; + ptr->x_aarch64_data_threshold = opts->x_aarch64_data_threshold; ptr->x_aarch64_branch_protection_string = opts->x_aarch64_branch_protection_string; } @@ -12220,6 +12320,7 @@ aarch64_option_restore (struct gcc_optio opts->x_explicit_arch = ptr->x_explicit_arch; selected_arch = aarch64_get_arch (ptr->x_explicit_arch); opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; + opts->x_aarch64_data_threshold = ptr->x_aarch64_data_threshold; opts->x_aarch64_branch_protection_string = ptr->x_aarch64_branch_protection_string; if (opts->x_aarch64_branch_protection_string) @@ -13067,6 +13168,8 @@ aarch64_classify_symbol (rtx x, HOST_WID case AARCH64_CMODEL_SMALL_SPIC: case AARCH64_CMODEL_SMALL_PIC: + case AARCH64_CMODEL_MEDIUM_PIC: + case AARCH64_CMODEL_MEDIUM: case AARCH64_CMODEL_SMALL: return SYMBOL_SMALL_ABSOLUTE; @@ -13100,6 +13203,7 @@ aarch64_classify_symbol (rtx x, HOST_WID return SYMBOL_TINY_ABSOLUTE; case AARCH64_CMODEL_SMALL: + AARCH64_SMALL_ROUTINE: /* Same reasoning as the tiny code model, but the offset cap here is 1MB, allowing +/-3.9GB for the offset to the symbol. */ @@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G); return SYMBOL_SMALL_ABSOLUTE; + case AARCH64_CMODEL_MEDIUM: + { + tree decl_local = SYMBOL_REF_DECL (x); + if (decl_local != NULL + && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) + { + HOST_WIDE_INT size = tree_to_uhwi ( + DECL_SIZE_UNIT (decl_local)); + /* If the data is smaller than the threshold, goto + the small code model. Else goto the large code + model. */ + if (size >= HOST_WIDE_INT (aarch64_data_threshold)) + goto AARCH64_LARGE_ROUTINE; + } + goto AARCH64_SMALL_ROUTINE; + } + + case AARCH64_CMODEL_MEDIUM_PIC: + { + tree decl_local = SYMBOL_REF_DECL (x); + if (decl_local != NULL + && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) + { + HOST_WIDE_INT size = tree_to_uhwi ( + DECL_SIZE_UNIT (decl_local)); + if (size < HOST_WIDE_INT (aarch64_data_threshold)) + { + if (!aarch64_symbol_binds_local_p (x)) + { + return SYMBOL_SMALL_GOT_4G; + } + return SYMBOL_SMALL_ABSOLUTE; + } + } + if (!aarch64_symbol_binds_local_p (x)) + { + return SYMBOL_MEDIUM_GOT_4G; + } + return SYMBOL_MEDIUM_ABSOLUTE; + } case AARCH64_CMODEL_LARGE: + AARCH64_LARGE_ROUTINE: /* This is alright even in PIC code as the constant pool reference is always PC relative and within the same translation unit. */ @@ -15364,6 +15509,8 @@ aarch64_asm_preferred_eh_data_format (in case AARCH64_CMODEL_SMALL: case AARCH64_CMODEL_SMALL_PIC: case AARCH64_CMODEL_SMALL_SPIC: + case AARCH64_CMODEL_MEDIUM: + case AARCH64_CMODEL_MEDIUM_PIC: /* text+got+data < 4Gb. 4-byte signed relocs are sufficient for everything. */ type = DW_EH_PE_sdata4; @@ -18454,7 +18601,8 @@ aarch64_empty_mask_is_expensive (unsigne bool aarch64_use_pseudo_pic_reg (void) { - return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC; + return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC + || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC ; } /* Implement TARGET_UNSPEC_MAY_TRAP_P. */ @@ -18464,6 +18612,7 @@ aarch64_unspec_may_trap_p (const_rtx x, { switch (XINT (x, 1)) { + case UNSPEC_GOTMEDIUMPIC4G: case UNSPEC_GOTSMALLPIC: case UNSPEC_GOTSMALLPIC28K: case UNSPEC_GOTTINYPIC: diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h --- a/gcc/config/aarch64/aarch64.h 2020-07-16 14:54:30.592000000 +0800 +++ b/gcc/config/aarch64/aarch64.h 2020-07-16 14:55:05.672000000 +0800 @@ -33,6 +33,10 @@ #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () +/* Default threshold 64-bit relocation data + with aarch64 medium memory model. */ +#define AARCH64_DEFAULT_LARGE_DATA_THRESHOLD 65536 + /* Target machine storage layout. */ #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md --- a/gcc/config/aarch64/aarch64.md 2020-07-16 14:54:30.588000000 +0800 +++ b/gcc/config/aarch64/aarch64.md 2020-07-16 14:55:05.676000000 +0800 @@ -209,6 +209,11 @@ UNSPEC_RSQRTS UNSPEC_NZCV UNSPEC_XPACLRI + UNSPEC_MOV_MEDIUM_SYMBOL + UNSPEC_GET_LAST_PC + UNSPEC_GOTMEDIUMPIC4G + UNSPEC_GET_GOTOFF + UNSPEC_LOAD_SYMBOL_MEDIUM UNSPEC_LD1_SVE UNSPEC_ST1_SVE UNSPEC_LDNT1_SVE @@ -6548,6 +6553,39 @@ [(set_attr "type" "load_4")] ) +(define_insn "get_gotoff_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand 1 "aarch64_valid_symref" "S")] + UNSPEC_GET_GOTOFF))] + "" + "movz\\t%x0, :gotoff_g1:%A1\;movk\\t%x0, :gotoff_g0_nc:%A1" + [(set_attr "type" "multiple") + (set_attr "length" "8")] +) + +(define_insn "ldr_got_medium_" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(mem:PTR (lo_sum:PTR + (match_operand:PTR 1 "register_operand" "r") + (match_operand:PTR 2 "register_operand" "r")))] + UNSPEC_GOTMEDIUMPIC4G))] + "" + "ldr\\t%0, [%1, %2]" + [(set_attr "type" "load_4")] +) + +(define_insn "ldr_got_medium_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unspec:SI [(mem:SI (lo_sum:DI + (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")))] + UNSPEC_GOTMEDIUMPIC4G)))] + "TARGET_ILP32" + "ldr\\t%0, [%1, %2]" + [(set_attr "type" "load_4")] +) + (define_insn "ldr_got_small_28k_" [(set (match_operand:PTR 0 "register_operand" "=r") (unspec:PTR [(mem:PTR (lo_sum:PTR @@ -6709,6 +6747,23 @@ (set_attr "length" "12")] ) +(define_insn "load_symbol_medium_" + [(set (match_operand:GPI 0 "register_operand" "=r") + (unspec:GPI [(match_operand 2 "aarch64_valid_symref" "S")] + UNSPEC_LOAD_SYMBOL_MEDIUM)) + (clobber (match_operand:GPI 1 "register_operand" "=r"))] + "" + "movz\\t%x0, :prel_g3:%A2\;\\ + movk\\t%x0, :prel_g2_nc:%A2\;\\ + movk\\t%x0, :prel_g1_nc:%A2\;\\ + movk\\t%x0, :prel_g0_nc:%A2\;\\ + adr\\t%x1, .\;\\ + sub\\t%x1, %x1, 0x4\;\\ + add\\t%x0, %x0, %x1" + [(set_attr "type" "multiple") + (set_attr "length" "28")] +) + (define_expand "tlsdesc_small_" [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)] "TARGET_TLS_DESC" diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt --- a/gcc/config/aarch64/aarch64.opt 2020-07-16 14:54:30.580000000 +0800 +++ b/gcc/config/aarch64/aarch64.opt 2020-07-16 14:55:05.676000000 +0800 @@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_cor TargetVariable enum aarch64_arch explicit_arch = aarch64_no_arch +;; -mlarge-data-threshold= +TargetSave +int x_aarch64_data_threshold + TargetSave const char *x_aarch64_override_tune_string @@ -61,8 +65,15 @@ EnumValue Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL) EnumValue +Enum(cmodel) String(medium) Value(AARCH64_CMODEL_MEDIUM) + +EnumValue Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE) +mlarge-data-threshold= +Target RejectNegative Joined UInteger Var(aarch64_data_threshold) Init(AARCH64_DEFAULT_LARGE_DATA_THRESHOLD) +-mlarge-data-threshold= Data greater than given threshold will be assume that it should be relocated using 64-bit relocation. + mbig-endian Target Report RejectNegative Mask(BIG_END) Assume target CPU is configured as big endian. diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h --- a/gcc/config/aarch64/aarch64-opts.h 2020-07-16 14:54:30.584000000 +0800 +++ b/gcc/config/aarch64/aarch64-opts.h 2020-07-16 14:55:05.676000000 +0800 @@ -66,6 +66,10 @@ enum aarch64_code_model { /* -fpic for small memory model. GOT size to 28KiB (4K*8-4K) or 3580 entries. */ AARCH64_CMODEL_SMALL_SPIC, + /* Using movk insn sequence to do 64bit PC relative relocation. */ + AARCH64_CMODEL_MEDIUM, + /* Using movk insn sequence to do 64bit PC relative got relocation. */ + AARCH64_CMODEL_MEDIUM_PIC, /* No assumptions about addresses of code and data. The PIC variant is not yet implemented. */ AARCH64_CMODEL_LARGE diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h --- a/gcc/config/aarch64/aarch64-protos.h 2020-07-16 14:54:30.584000000 +0800 +++ b/gcc/config/aarch64/aarch64-protos.h 2020-07-16 14:55:05.676000000 +0800 @@ -95,9 +95,11 @@ */ enum aarch64_symbol_type { + SYMBOL_MEDIUM_ABSOLUTE, SYMBOL_SMALL_ABSOLUTE, SYMBOL_SMALL_GOT_28K, SYMBOL_SMALL_GOT_4G, + SYMBOL_MEDIUM_GOT_4G, SYMBOL_SMALL_TLSGD, SYMBOL_SMALL_TLSDESC, SYMBOL_SMALL_TLSIE,