; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch32 -mattr=+d --verify-machineinstrs < %s \
; RUN:   | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s \
; RUN:   | FileCheck %s --check-prefix=LA64

declare void @callee(ptr)

define void @caller32() {
; LA32-LABEL: caller32:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -32
; LA32-NEXT:    .cfi_def_cfa_offset 32
; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 32
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    bstrins.w $sp, $zero, 4, 0
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    addi.w $sp, $fp, -32
; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 32
; LA32-NEXT:    ret
;
; LA64-LABEL: caller32:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -64
; LA64-NEXT:    .cfi_def_cfa_offset 64
; LA64-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 64
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    bstrins.d $sp, $zero, 4, 0
; LA64-NEXT:    addi.d $a0, $sp, 32
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    addi.d $sp, $fp, -64
; LA64-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 64
; LA64-NEXT:    ret
  %1 = alloca i8, align 32
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign32() "no-realign-stack" {
; LA32-LABEL: caller_no_realign32:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign32:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 32
  call void @callee(ptr %1)
  ret void
}

define void @caller64() {
; LA32-LABEL: caller64:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -64
; LA32-NEXT:    .cfi_def_cfa_offset 64
; LA32-NEXT:    st.w $ra, $sp, 60 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 56 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 64
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    bstrins.w $sp, $zero, 5, 0
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    addi.w $sp, $fp, -64
; LA32-NEXT:    ld.w $fp, $sp, 56 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 60 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 64
; LA32-NEXT:    ret
;
; LA64-LABEL: caller64:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -128
; LA64-NEXT:    .cfi_def_cfa_offset 128
; LA64-NEXT:    st.d $ra, $sp, 120 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 112 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 128
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    bstrins.d $sp, $zero, 5, 0
; LA64-NEXT:    addi.d $a0, $sp, 64
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    addi.d $sp, $fp, -128
; LA64-NEXT:    ld.d $fp, $sp, 112 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 120 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 128
; LA64-NEXT:    ret
  %1 = alloca i8, align 64
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign64() "no-realign-stack" {
; LA32-LABEL: caller_no_realign64:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign64:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 64
  call void @callee(ptr %1)
  ret void
}

define void @caller128() {
; LA32-LABEL: caller128:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -128
; LA32-NEXT:    .cfi_def_cfa_offset 128
; LA32-NEXT:    st.w $ra, $sp, 124 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 120 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 128
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    bstrins.w $sp, $zero, 6, 0
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    addi.w $sp, $fp, -128
; LA32-NEXT:    ld.w $fp, $sp, 120 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 124 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 128
; LA32-NEXT:    ret
;
; LA64-LABEL: caller128:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -256
; LA64-NEXT:    .cfi_def_cfa_offset 256
; LA64-NEXT:    st.d $ra, $sp, 248 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 240 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 256
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    bstrins.d $sp, $zero, 6, 0
; LA64-NEXT:    addi.d $a0, $sp, 128
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    addi.d $sp, $fp, -256
; LA64-NEXT:    ld.d $fp, $sp, 240 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 248 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 256
; LA64-NEXT:    ret
  %1 = alloca i8, align 128
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign128() "no-realign-stack" {
; LA32-LABEL: caller_no_realign128:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign128:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 128
  call void @callee(ptr %1)
  ret void
}

define void @caller256() {
; LA32-LABEL: caller256:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -256
; LA32-NEXT:    .cfi_def_cfa_offset 256
; LA32-NEXT:    st.w $ra, $sp, 252 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 248 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 256
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    bstrins.w $sp, $zero, 7, 0
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    addi.w $sp, $fp, -256
; LA32-NEXT:    ld.w $fp, $sp, 248 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 252 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 256
; LA32-NEXT:    ret
;
; LA64-LABEL: caller256:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -512
; LA64-NEXT:    .cfi_def_cfa_offset 512
; LA64-NEXT:    st.d $ra, $sp, 504 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 496 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 512
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    bstrins.d $sp, $zero, 7, 0
; LA64-NEXT:    addi.d $a0, $sp, 256
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    addi.d $sp, $fp, -512
; LA64-NEXT:    ld.d $fp, $sp, 496 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 504 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 512
; LA64-NEXT:    ret
  %1 = alloca i8, align 256
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign256() "no-realign-stack" {
; LA32-LABEL: caller_no_realign256:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign256:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 256
  call void @callee(ptr %1)
  ret void
}

define void @caller512() {
; LA32-LABEL: caller512:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -1024
; LA32-NEXT:    .cfi_def_cfa_offset 1024
; LA32-NEXT:    st.w $ra, $sp, 1020 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 1016 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 1024
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    bstrins.w $sp, $zero, 8, 0
; LA32-NEXT:    addi.w $a0, $sp, 512
; LA32-NEXT:    bl callee
; LA32-NEXT:    addi.w $sp, $fp, -1024
; LA32-NEXT:    ld.w $fp, $sp, 1016 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 1020 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 1024
; LA32-NEXT:    ret
;
; LA64-LABEL: caller512:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -1024
; LA64-NEXT:    .cfi_def_cfa_offset 1024
; LA64-NEXT:    st.d $ra, $sp, 1016 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 1008 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 1024
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    bstrins.d $sp, $zero, 8, 0
; LA64-NEXT:    addi.d $a0, $sp, 512
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    addi.d $sp, $fp, -1024
; LA64-NEXT:    ld.d $fp, $sp, 1008 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 1016 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 1024
; LA64-NEXT:    ret
  %1 = alloca i8, align 512
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign512() "no-realign-stack" {
; LA32-LABEL: caller_no_realign512:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign512:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 512
  call void @callee(ptr %1)
  ret void
}

define void @caller1024() {
; LA32-LABEL: caller1024:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -2032
; LA32-NEXT:    .cfi_def_cfa_offset 2032
; LA32-NEXT:    st.w $ra, $sp, 2028 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 2024 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 2032
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    bstrins.w $sp, $zero, 9, 0
; LA32-NEXT:    addi.w $a0, $sp, 1024
; LA32-NEXT:    bl callee
; LA32-NEXT:    addi.w $sp, $fp, -2048
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ld.w $fp, $sp, 2024 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 2028 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 2032
; LA32-NEXT:    ret
;
; LA64-LABEL: caller1024:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -2032
; LA64-NEXT:    .cfi_def_cfa_offset 2032
; LA64-NEXT:    st.d $ra, $sp, 2024 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 2016 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 2032
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    bstrins.d $sp, $zero, 9, 0
; LA64-NEXT:    addi.d $a0, $sp, 1024
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    addi.d $sp, $fp, -2048
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ld.d $fp, $sp, 2016 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 2024 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 2032
; LA64-NEXT:    ret
  %1 = alloca i8, align 1024
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign1024() "no-realign-stack" {
; LA32-LABEL: caller_no_realign1024:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign1024:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 1024
  call void @callee(ptr %1)
  ret void
}

define void @caller2048() {
; LA32-LABEL: caller2048:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -2032
; LA32-NEXT:    .cfi_def_cfa_offset 2032
; LA32-NEXT:    st.w $ra, $sp, 2028 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 2024 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 2032
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    addi.w $sp, $sp, -2048
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    bstrins.w $sp, $zero, 10, 0
; LA32-NEXT:    ori $a0, $zero, 2048
; LA32-NEXT:    add.w $a0, $sp, $a0
; LA32-NEXT:    bl callee
; LA32-NEXT:    lu12i.w $a0, 1
; LA32-NEXT:    sub.w $sp, $fp, $a0
; LA32-NEXT:    addi.w $sp, $sp, 2032
; LA32-NEXT:    addi.w $sp, $sp, 32
; LA32-NEXT:    ld.w $fp, $sp, 2024 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 2028 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 2032
; LA32-NEXT:    ret
;
; LA64-LABEL: caller2048:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -2032
; LA64-NEXT:    .cfi_def_cfa_offset 2032
; LA64-NEXT:    st.d $ra, $sp, 2024 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 2016 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 2032
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    addi.d $sp, $sp, -2048
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    bstrins.d $sp, $zero, 10, 0
; LA64-NEXT:    ori $a0, $zero, 2048
; LA64-NEXT:    add.d $a0, $sp, $a0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    lu12i.w $a0, 1
; LA64-NEXT:    sub.d $sp, $fp, $a0
; LA64-NEXT:    addi.d $sp, $sp, 2032
; LA64-NEXT:    addi.d $sp, $sp, 32
; LA64-NEXT:    ld.d $fp, $sp, 2016 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 2024 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 2032
; LA64-NEXT:    ret
  %1 = alloca i8, align 2048
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign2048() "no-realign-stack" {
; LA32-LABEL: caller_no_realign2048:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign2048:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 2048
  call void @callee(ptr %1)
  ret void
}

define void @caller4096() {
; LA32-LABEL: caller4096:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -2032
; LA32-NEXT:    .cfi_def_cfa_offset 2032
; LA32-NEXT:    st.w $ra, $sp, 2028 # 4-byte Folded Spill
; LA32-NEXT:    st.w $fp, $sp, 2024 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    .cfi_offset 22, -8
; LA32-NEXT:    addi.w $fp, $sp, 2032
; LA32-NEXT:    .cfi_def_cfa 22, 0
; LA32-NEXT:    lu12i.w $a0, 1
; LA32-NEXT:    ori $a0, $a0, 2064
; LA32-NEXT:    sub.w $sp, $sp, $a0
; LA32-NEXT:    bstrins.w $sp, $zero, 11, 0
; LA32-NEXT:    lu12i.w $a0, 1
; LA32-NEXT:    add.w $a0, $sp, $a0
; LA32-NEXT:    bl callee
; LA32-NEXT:    lu12i.w $a0, 2
; LA32-NEXT:    sub.w $sp, $fp, $a0
; LA32-NEXT:    lu12i.w $a0, 1
; LA32-NEXT:    ori $a0, $a0, 2064
; LA32-NEXT:    add.w $sp, $sp, $a0
; LA32-NEXT:    ld.w $fp, $sp, 2024 # 4-byte Folded Reload
; LA32-NEXT:    ld.w $ra, $sp, 2028 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 2032
; LA32-NEXT:    ret
;
; LA64-LABEL: caller4096:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -2032
; LA64-NEXT:    .cfi_def_cfa_offset 2032
; LA64-NEXT:    st.d $ra, $sp, 2024 # 8-byte Folded Spill
; LA64-NEXT:    st.d $fp, $sp, 2016 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    .cfi_offset 22, -16
; LA64-NEXT:    addi.d $fp, $sp, 2032
; LA64-NEXT:    .cfi_def_cfa 22, 0
; LA64-NEXT:    lu12i.w $a0, 1
; LA64-NEXT:    ori $a0, $a0, 2064
; LA64-NEXT:    sub.d $sp, $sp, $a0
; LA64-NEXT:    bstrins.d $sp, $zero, 11, 0
; LA64-NEXT:    lu12i.w $a0, 1
; LA64-NEXT:    add.d $a0, $sp, $a0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    lu12i.w $a0, 2
; LA64-NEXT:    sub.d $sp, $fp, $a0
; LA64-NEXT:    lu12i.w $a0, 1
; LA64-NEXT:    ori $a0, $a0, 2064
; LA64-NEXT:    add.d $sp, $sp, $a0
; LA64-NEXT:    ld.d $fp, $sp, 2016 # 8-byte Folded Reload
; LA64-NEXT:    ld.d $ra, $sp, 2024 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 2032
; LA64-NEXT:    ret
  %1 = alloca i8, align 4096
  call void @callee(ptr %1)
  ret void
}

define void @caller_no_realign4096() "no-realign-stack" {
; LA32-LABEL: caller_no_realign4096:
; LA32:       # %bb.0:
; LA32-NEXT:    addi.w $sp, $sp, -16
; LA32-NEXT:    .cfi_def_cfa_offset 16
; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
; LA32-NEXT:    .cfi_offset 1, -4
; LA32-NEXT:    addi.w $a0, $sp, 0
; LA32-NEXT:    bl callee
; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
; LA32-NEXT:    addi.w $sp, $sp, 16
; LA32-NEXT:    ret
;
; LA64-LABEL: caller_no_realign4096:
; LA64:       # %bb.0:
; LA64-NEXT:    addi.d $sp, $sp, -16
; LA64-NEXT:    .cfi_def_cfa_offset 16
; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT:    .cfi_offset 1, -8
; LA64-NEXT:    addi.d $a0, $sp, 0
; LA64-NEXT:    pcaddu18i $ra, %call36(callee)
; LA64-NEXT:    jirl $ra, $ra, 0
; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT:    addi.d $sp, $sp, 16
; LA64-NEXT:    ret
  %1 = alloca i8, align 4096
  call void @callee(ptr %1)
  ret void
}
