From adc26b4eaedc50f1b99d5af5c7e248966fced660 Mon Sep 17 00:00:00 2001 From: Zhiyao Ma Date: Thu, 10 Mar 2022 15:13:41 -0800 Subject: [ARM] Fix 8-bit immediate overflow in the instruction of segmented stack prologue. It fixes the overflow of 8-bit immediate field in the emitted instruction that allocates large stacklet. For thumb2 targets, load large immediate by a pair of movw and movt instruction. For thumb1 and ARM targets, load large immediate by reading from literal pool. Differential Revision: https://reviews.llvm.org/D118545 --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 123 +++++++++++++++++++++------ llvm/test/CodeGen/ARM/segmented-stacks.ll | 56 +++++++----- llvm/test/CodeGen/Thumb/segmented-stacks.ll | 29 +++++-- llvm/test/CodeGen/Thumb2/segmented-stacks.ll | 104 ++++++++++++++++++++++ 4 files changed, 258 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 1f2f6f7497e0..b9b417865691 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -2488,6 +2488,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( unsigned CFIIndex; const ARMSubtarget *ST = &MF.getSubtarget(); bool Thumb = ST->isThumb(); + bool Thumb2 = ST->isThumb2(); // Sadly, this currently doesn't support varargs, platforms other than // android/linux. Note that thumb1/thumb2 are support for android/linux. @@ -2630,17 +2631,46 @@ void ARMFrameLowering::adjustForSegmentedStacks( // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) - .add(condCodeOp()) - .addReg(ScratchReg1) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)); + if (AlignedStackSize < 256) { + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(McrMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) + .addImm(AlignedStackSize); + } else { + auto MBBI = McrMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } + BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1) + .add(condCodeOp()) + .addReg(ScratchReg1) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)); + } } else if (!CompareStackPointer) { - BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) - .addReg(ARM::SP) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (AlignedStackSize < 256) { + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + .addReg(ARM::SP) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = McrMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1) + .addReg(ARM::SP) + .addReg(ScratchReg0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } } if (Thumb && ST->isThumb1Only()) { @@ -2707,28 +2737,69 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Pass first argument for the __morestack by Scratch Register #0. // The amount size of stack required if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) - .add(condCodeOp()) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)); + if (AlignedStackSize < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + .add(condCodeOp()) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg0) + .addImm(AlignedStackSize); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } + } } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) - .addImm(AlignedStackSize) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (AlignedStackSize < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + .addImm(AlignedStackSize) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0, + AlignedStackSize); + } } + // Pass second argument for the __morestack by Scratch Register #1. // The amount size of stack consumed to save function arguments. if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) - .add(condCodeOp()) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) - .add(predOps(ARMCC::AL)); + if (ARMFI->getArgumentStackSize() < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + .add(condCodeOp()) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)); + } else { + if (Thumb2) { + BuildMI(AllocMBB, DL, TII.get(ARM::t2MOVi32imm), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool( + *AllocMBB, MBBI, DL, ScratchReg1, 0, + alignToARMConstant(ARMFI->getArgumentStackSize())); + } + } } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) - .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); + if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) { + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } else { + auto MBBI = AllocMBB->end(); + auto RegInfo = STI.getRegisterInfo(); + RegInfo->emitLoadConstPool( + *AllocMBB, MBBI, DL, ScratchReg1, 0, + alignToARMConstant(ARMFI->getArgumentStackSize())); + } } // push {lr} - Save return address of this function. diff --git a/llvm/test/CodeGen/ARM/segmented-stacks.ll b/llvm/test/CodeGen/ARM/segmented-stacks.ll index 971fdd966065..10ce0657e573 100644 --- a/llvm/test/CodeGen/ARM/segmented-stacks.ll +++ b/llvm/test/CodeGen/ARM/segmented-stacks.ll @@ -14,7 +14,7 @@ define void @test_basic() #0 { call void @dummy_use (i32* %mem, i32 10) ret void -; ARM-linux: test_basic: +; ARM-linux-LABEL: test_basic: ; ARM-linux: push {r4, r5} ; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 @@ -33,7 +33,7 @@ define void @test_basic() #0 { ; ARM-linux: pop {r4, r5} -; ARM-android: test_basic: +; ARM-android-LABEL: test_basic: ; ARM-android: push {r4, r5} ; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 @@ -61,7 +61,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { call void @dummy_use (i32* %mem, i32 10) ret i32 %result -; ARM-linux: test_nested: +; ARM-linux-LABEL: test_nested: ; ARM-linux: push {r4, r5} ; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 @@ -80,7 +80,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 { ; ARM-linux: pop {r4, r5} -; ARM-android: test_nested: +; ARM-android-LABEL: test_nested: ; ARM-android: push {r4, r5} ; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 @@ -106,16 +106,17 @@ define void @test_large() #0 { call void @dummy_use (i32* %mem, i32 0) ret void -; ARM-linux: test_large: +; ARM-linux-LABEL: test_large: ; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: ldr r4, .LCPI2_0 +; ARM-linux-NEXT: sub r5, sp, r4 ; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 -; ARM-linux-NEXT: sub r5, sp, #40192 ; ARM-linux-NEXT: ldr r4, [r4, #4] ; ARM-linux-NEXT: cmp r4, r5 ; ARM-linux-NEXT: blo .LBB2_2 -; ARM-linux: mov r4, #40192 +; ARM-linux: ldr r4, .LCPI2_0 ; ARM-linux-NEXT: mov r5, #0 ; ARM-linux-NEXT: stmdb sp!, {lr} ; ARM-linux-NEXT: bl __morestack @@ -125,16 +126,20 @@ define void @test_large() #0 { ; ARM-linux: pop {r4, r5} -; ARM-android: test_large: +; ARM-linux: .LCPI2_0: +; ARM-linux-NEXT: .long 40192 + +; ARM-android-LABEL: test_large: ; ARM-android: push {r4, r5} +; ARM-android-NEXT: ldr r4, .LCPI2_0 +; ARM-android-NEXT: sub r5, sp, r4 ; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 -; ARM-android-NEXT: sub r5, sp, #40192 ; ARM-android-NEXT: ldr r4, [r4, #252] ; ARM-android-NEXT: cmp r4, r5 ; ARM-android-NEXT: blo .LBB2_2 -; ARM-android: mov r4, #40192 +; ARM-android: ldr r4, .LCPI2_0 ; ARM-android-NEXT: mov r5, #0 ; ARM-android-NEXT: stmdb sp!, {lr} ; ARM-android-NEXT: bl __morestack @@ -144,6 +149,9 @@ define void @test_large() #0 { ; ARM-android: pop {r4, r5} +; ARM-android: .LCPI2_0: +; ARM-android-NEXT: .long 40192 + } define fastcc void @test_fastcc() #0 { @@ -151,7 +159,7 @@ define fastcc void @test_fastcc() #0 { call void @dummy_use (i32* %mem, i32 10) ret void -; ARM-linux: test_fastcc: +; ARM-linux-LABEL: test_fastcc: ; ARM-linux: push {r4, r5} ; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 @@ -170,7 +178,7 @@ define fastcc void @test_fastcc() #0 { ; ARM-linux: pop {r4, r5} -; ARM-android: test_fastcc: +; ARM-android-LABEL: test_fastcc: ; ARM-android: push {r4, r5} ; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 @@ -196,16 +204,17 @@ define fastcc void @test_fastcc_large() #0 { call void @dummy_use (i32* %mem, i32 0) ret void -; ARM-linux: test_fastcc_large: +; ARM-linux-LABEL: test_fastcc_large: ; ARM-linux: push {r4, r5} +; ARM-linux-NEXT: ldr r4, .LCPI4_0 +; ARM-linux-NEXT: sub r5, sp, r4 ; ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 -; ARM-linux-NEXT: sub r5, sp, #40192 ; ARM-linux-NEXT: ldr r4, [r4, #4] ; ARM-linux-NEXT: cmp r4, r5 ; ARM-linux-NEXT: blo .LBB4_2 -; ARM-linux: mov r4, #40192 +; ARM-linux: ldr r4, .LCPI4_0 ; ARM-linux-NEXT: mov r5, #0 ; ARM-linux-NEXT: stmdb sp!, {lr} ; ARM-linux-NEXT: bl __morestack @@ -215,16 +224,20 @@ define fastcc void @test_fastcc_large() #0 { ; ARM-linux: pop {r4, r5} -; ARM-android: test_fastcc_large: +; ARM-linux: .LCPI4_0: +; ARM-linux-NEXT: .long 40192 + +; ARM-android-LABEL: test_fastcc_large: ; ARM-android: push {r4, r5} +; ARM-android-NEXT: ldr r4, .LCPI4_0 +; ARM-android-NEXT: sub r5, sp, r4 ; ARM-android-NEXT: mrc p15, #0, r4, c13, c0, #3 -; ARM-android-NEXT: sub r5, sp, #40192 ; ARM-android-NEXT: ldr r4, [r4, #252] ; ARM-android-NEXT: cmp r4, r5 ; ARM-android-NEXT: blo .LBB4_2 -; ARM-android: mov r4, #40192 +; ARM-android: ldr r4, .LCPI4_0 ; ARM-android-NEXT: mov r5, #0 ; ARM-android-NEXT: stmdb sp!, {lr} ; ARM-android-NEXT: bl __morestack @@ -234,6 +247,9 @@ define fastcc void @test_fastcc_large() #0 { ; ARM-android: pop {r4, r5} +; ARM-android: .LCPI4_0: +; ARM-android-NEXT: .long 40192 + } define void @test_nostack() #0 { @@ -256,10 +272,10 @@ define i32 @test_sibling_call_empty_frame(i32 %x) #0 { %call = tail call i32 @callee(i32 %x) #0 ret i32 %call -; ARM-linux: test_sibling_call_empty_frame: +; ARM-linux-LABEL: test_sibling_call_empty_frame: ; ARM-linux: bl __morestack -; ARM-android: test_sibling_call_empty_frame: +; ARM-android-LABEL: test_sibling_call_empty_frame: ; ARM-android: bl __morestack } diff --git a/llvm/test/CodeGen/Thumb/segmented-stacks.ll b/llvm/test/CodeGen/Thumb/segmented-stacks.ll index 7340842a42f4..6ad8cf9f20d2 100644 --- a/llvm/test/CodeGen/Thumb/segmented-stacks.ll +++ b/llvm/test/CodeGen/Thumb/segmented-stacks.ll @@ -116,13 +116,14 @@ define void @test_large() #0 { ; Thumb-android: push {r4, r5} ; Thumb-android-NEXT: mov r5, sp -; Thumb-android-NEXT: sub r5, #40192 ; Thumb-android-NEXT: ldr r4, .LCPI2_2 +; Thumb-android-NEXT: sub r5, r5, r4 +; Thumb-android-NEXT: ldr r4, .LCPI2_3 ; Thumb-android-NEXT: ldr r4, [r4] ; Thumb-android-NEXT: cmp r4, r5 ; Thumb-android-NEXT: blo .LBB2_2 -; Thumb-android: mov r4, #40192 +; Thumb-android: ldr r4, .LCPI2_2 ; Thumb-android-NEXT: mov r5, #0 ; Thumb-android-NEXT: push {lr} ; Thumb-android-NEXT: bl __morestack @@ -133,17 +134,21 @@ define void @test_large() #0 { ; Thumb-android: pop {r4, r5} +; Thumb-android: .LCPI2_2: +; Thumb-android-NEXT: .long 40192 + ; Thumb-linux-LABEL: test_large: ; Thumb-linux: push {r4, r5} ; Thumb-linux-NEXT: mov r5, sp -; Thumb-linux-NEXT: sub r5, #40192 ; Thumb-linux-NEXT: ldr r4, .LCPI2_2 +; Thumb-linux-NEXT: sub r5, r5, r4 +; Thumb-linux-NEXT: ldr r4, .LCPI2_3 ; Thumb-linux-NEXT: ldr r4, [r4] ; Thumb-linux-NEXT: cmp r4, r5 ; Thumb-linux-NEXT: blo .LBB2_2 -; Thumb-linux: mov r4, #40192 +; Thumb-linux: ldr r4, .LCPI2_2 ; Thumb-linux-NEXT: mov r5, #0 ; Thumb-linux-NEXT: push {lr} ; Thumb-linux-NEXT: bl __morestack @@ -212,13 +217,14 @@ define fastcc void @test_fastcc_large() #0 { ; Thumb-android: push {r4, r5} ; Thumb-android-NEXT: mov r5, sp -; Thumb-android-NEXT: sub r5, #40192 ; Thumb-android-NEXT: ldr r4, .LCPI4_2 +; Thumb-android-NEXT: sub r5, r5, r4 +; Thumb-android-NEXT: ldr r4, .LCPI4_3 ; Thumb-android-NEXT: ldr r4, [r4] ; Thumb-android-NEXT: cmp r4, r5 ; Thumb-android-NEXT: blo .LBB4_2 -; Thumb-android: mov r4, #40192 +; Thumb-android: ldr r4, .LCPI4_2 ; Thumb-android-NEXT: mov r5, #0 ; Thumb-android-NEXT: push {lr} ; Thumb-android-NEXT: bl __morestack @@ -229,17 +235,21 @@ define fastcc void @test_fastcc_large() #0 { ; Thumb-android: pop {r4, r5} +; Thumb-android: .LCPI4_2: +; Thumb-android-NEXT: .long 40192 + ; Thumb-linux-LABEL: test_fastcc_large: ; Thumb-linux: push {r4, r5} ; Thumb-linux-NEXT: mov r5, sp -; Thumb-linux-NEXT: sub r5, #40192 ; Thumb-linux-NEXT: ldr r4, .LCPI4_2 +; Thumb-linux-NEXT: sub r5, r5, r4 +; Thumb-linux-NEXT: ldr r4, .LCPI4_3 ; Thumb-linux-NEXT: ldr r4, [r4] ; Thumb-linux-NEXT: cmp r4, r5 ; Thumb-linux-NEXT: blo .LBB4_2 -; Thumb-linux: mov r4, #40192 +; Thumb-linux: ldr r4, .LCPI4_2 ; Thumb-linux-NEXT: mov r5, #0 ; Thumb-linux-NEXT: push {lr} ; Thumb-linux-NEXT: bl __morestack @@ -250,6 +260,9 @@ define fastcc void @test_fastcc_large() #0 { ; Thumb-linux: pop {r4, r5} +; Thumb-linux: .LCPI4_2: +; Thumb-linux-NEXT: .long 40192 + } define void @test_nostack() #0 { diff --git a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll index ee4dd0186b3a..0f34e9c98e89 100644 --- a/llvm/test/CodeGen/Thumb2/segmented-stacks.ll +++ b/llvm/test/CodeGen/Thumb2/segmented-stacks.ll @@ -67,4 +67,108 @@ define void @test_basic() #0 { ret void } +define void @test_large() #0 { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; THUMB-LABEL: test_large: + +; THUMB: push {r4, r5} +; THUMB-NEXT: movw r4, #40192 +; THUMB-NEXT: mov r5, sp +; THUMB-NEXT: movt r4, #0 +; THUMB-NEXT: sub r5, r5, r4 +; THUMB-NEXT: mrc p15, #0, r4, c13, c0, #3 +; THUMB-NEXT: ldr.w r4, [r4, #252] +; THUMB-NEXT: cmp r4, r5 +; THUMB-NEXT: blo .LBB1_2 + +; THUMB: movw r4, #40192 +; THUMB-NEXT: movt r4, #0 +; THUMB-NEXT: mov r5, #0 +; THUMB-NEXT: push {lr} +; THUMB-NEXT: bl __morestack +; THUMB-NEXT: ldr lr, [sp], #4 +; THUMB-NEXT: pop {r4, r5} +; THUMB-NEXT: bx lr + +; THUMB: pop {r4, r5} + + +; ARM-LABEL: test_large: + +; ARM: push {r4, r5} +; ARM-NEXT: ldr r4, .LCPI1_0 +; ARM-NEXT: sub r5, sp, r4 +; ARM-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-NEXT: ldr r4, [r4, #252] +; ARM-NEXT: cmp r4, r5 +; ARM-NEXT: blo .LBB1_2 + +; ARM: ldr r4, .LCPI1_0 +; ARM-NEXT: mov r5, #0 +; ARM-NEXT: stmdb sp!, {lr} +; ARM-NEXT: bl __morestack +; ARM-NEXT: ldm sp!, {lr} +; ARM-NEXT: pop {r4, r5} +; ARM-NEXT: bx lr + +; ARM: pop {r4, r5} + +; ARM: .LCPI1_0: +; ARM-NEXT: .long 40192 + +} + +define fastcc void @test_fastcc_large() #0 { + %mem = alloca i32, i32 10000 + call void @dummy_use (i32* %mem, i32 0) + ret void + +; THUMB-LABEL: test_fastcc_large: + +; THUMB: push {r4, r5} +; THUMB-NEXT: movw r4, #40192 +; THUMB-NEXT: mov r5, sp +; THUMB-NEXT: movt r4, #0 +; THUMB-NEXT: sub r5, r5, r4 +; THUMB-NEXT: mrc p15, #0, r4, c13, c0, #3 +; THUMB-NEXT: ldr.w r4, [r4, #252] +; THUMB-NEXT: cmp r4, r5 +; THUMB-NEXT: blo .LBB2_2 + +; THUMB: movw r4, #40192 +; THUMB-NEXT: movt r4, #0 +; THUMB-NEXT: mov r5, #0 +; THUMB-NEXT: push {lr} +; THUMB-NEXT: bl __morestack +; THUMB-NEXT: ldr lr, [sp], #4 +; THUMB-NEXT: pop {r4, r5} +; THUMB-NEXT: bx lr + +; THUMB: pop {r4, r5} + +; ARM-LABEL: test_fastcc_large: + +; ARM: push {r4, r5} +; ARM-NEXT: ldr r4, .LCPI2_0 +; ARM-NEXT: sub r5, sp, r4 +; ARM-NEXT: mrc p15, #0, r4, c13, c0, #3 +; ARM-NEXT: ldr r4, [r4, #252] +; ARM-NEXT: cmp r4, r5 +; ARM-NEXT: blo .LBB2_2 + +; ARM: ldr r4, .LCPI2_0 +; ARM-NEXT: mov r5, #0 +; ARM-NEXT: stmdb sp!, {lr} +; ARM-NEXT: bl __morestack +; ARM-NEXT: ldm sp!, {lr} +; ARM-NEXT: pop {r4, r5} +; ARM-NEXT: bx lr + +; ARM: .LCPI2_0: +; ARM-NEXT: .long 40192 +} + attributes #0 = { "split-stack" } -- cgit v1.2.3