Ticket #8033: ghc-x86-64-avx-volatile (1).patch

File ghc-x86-64-avx-volatile (1).patch, 4.4 KB (added by carter, 2 years ago)
  • lib/Target/X86/X86CallingConv.td

     
    284284  CCIfType<[i64],
    285285            CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
    286286
    287   // Pass in STG registers: F1, F2, F3, F4, D1, D2
     287  // Pass in STG registers for  floats, doubles and 128bit simd vectors
    288288  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
    289289            CCIfSubtarget<"hasSSE1()",
    290             CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
     290            CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>,
     291
     292  // Pass in STG registers for 256bit simd vectors
     293  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
     294                          CCIfSubtarget<"hasAVX()",
     295                          CCAssignToReg<[ YMM1, YMM2, YMM3,
     296                                         YMM4, YMM5, YMM6]>>>
     297
    291298]>;
    292299
    293300def CC_X86_64_HiPE : CallingConv<[
  • test/CodeGen/X86/ghc-cc64-avx.ll

     
     1; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu -mcpu=core-avx2  | FileCheck %s
     2
     3; Check the GHC call convention works (x86-64)
     4
     5@base  = external global i64 ; assigned to register: R13
     6@sp    = external global i64 ; assigned to register: RBP
     7@hp    = external global i64 ; assigned to register: R12
     8@r1    = external global i64 ; assigned to register: RBX
     9@r2    = external global i64 ; assigned to register: R14
     10@r3    = external global i64 ; assigned to register: RSI
     11@r4    = external global i64 ; assigned to register: RDI
     12@r5    = external global i64 ; assigned to register: R8
     13@r6    = external global i64 ; assigned to register: R9
     14@splim = external global i64 ; assigned to register: R15
     15
     16@v1 = external global <4 x double> ; assigned to register: YMM1
     17@v2 = external global <4 x double> ; assigned to register: YMM2
     18@v3 = external global <2 x double> ; assigned to register: XMM3
     19@v4 = external global <2 x double> ; assigned to register: XMM4
     20@v5 = external global <4 x double> ; assigned to register: YMM5
     21@v6 = external global <4 x double> ; assigned to register: YMM6
     22
     23define void @zap(i64 %a, i64 %b) nounwind {
     24entry:
     25  ; CHECK:      movq %rdi, %r13
     26  ; CHECK-NEXT: movq %rsi, %rbp
     27  ; CHECK-NEXT: callq addtwo
     28  %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
     29  ; CHECK:      callq foo
     30  call void @foo() nounwind
     31  ret void
     32}
     33
     34define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
     35entry:
     36  ; CHECK:      leaq (%r13,%rbp), %rax
     37  %0 = add i64 %x, %y
     38  ; CHECK-NEXT: ret
     39  ret i64 %0
     40}
     41
     42define cc 10 void @foo() nounwind {
     43entry:
     44  ; CHECK: movq base(%rip), %r13
     45  ; CHECK: movq sp(%rip), %rbp
     46  ; CHECK: movq hp(%rip), %r12
     47  ; CHECK: movq r1(%rip), %rbx
     48  ; CHECK: movq r2(%rip), %r14
     49  ; CHECK: movq r3(%rip), %rsi
     50  ; CHECK: movq r4(%rip), %rdi
     51  ; CHECK: movq r5(%rip), %r8
     52  ; CHECK: movq r6(%rip), %r9
     53  ; CHECK: movq splim(%rip), %r15
     54  ; CHECK: vmovaps v1(%rip), %ymm1
     55  ; CHECK: vmovaps v2(%rip), %ymm2
     56  ; CHECK: vmovaps v3(%rip), %xmm3
     57  ; CHECK: vmovaps v4(%rip), %xmm4
     58  ; CHECK: vmovaps v5(%rip), %ymm5
     59  ; CHECK: vmovaps v6(%rip), %ymm6
     60  %0 = load volatile i64* @base
     61  %1 = load volatile i64* @sp
     62  %2 = load volatile i64* @hp
     63  %3 = load volatile i64* @r1
     64  %4 = load volatile i64* @r2
     65  %5 = load volatile i64* @r3
     66  %6 = load volatile i64* @r4
     67  %7 = load volatile i64* @r5
     68  %8 = load volatile i64* @r6
     69  %9 = load volatile i64* @splim
     70  %10 = load volatile <4 x double>* @v1
     71  %11 = load volatile <4 x double>* @v2
     72  %12 = load volatile <2 x double>* @v3
     73  %13 = load volatile <2 x double>* @v4
     74  %14 = load volatile <4 x double>* @v5
     75  %15 = load volatile <4 x double>* @v6
     76  ; CHECK: jmp bar
     77  tail call cc 10 void @bar( i64 %0, i64 %1, i64 %2, i64 %3, i64 %4,
     78                             i64 %5, i64 %6, i64 %7, i64 %8, i64 %9,
     79                             <4 x double> %10, <4 x double> %11, <2 x double> %12, <2 x double> %13,<4 x double> %14, <4 x double> %15 ) nounwind
     80  ret void
     81}
     82
     83declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
     84                        <4 x double>, <4 x double>, <2 x double>, <2 x double>, <4 x double>, <4 x double>)
     85