Ticket #8033: ghc-x86-64+avx.diff

File ghc-x86-64+avx.diff, 4.3 KB (added by carter, 9 months ago)
  • lib/Target/X86/X86CallingConv.td

    diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
    index 9eafbd5..58874e4 100644
    a b def CC_X86_64_GHC : CallingConv<[ 
    284284  CCIfType<[i64], 
    285285            CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>, 
    286286 
    287   // Pass in STG registers: F1, F2, F3, F4, D1, D2 
     287  // Pass in STG registers for  floats, doubles and 128bit simd vectors 
    288288  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
    289289            CCIfSubtarget<"hasSSE1()", 
    290             CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> 
     290            CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>, 
     291 
     292  // Pass in STG registers for 256bit simd vectors 
     293  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
     294                          CCIfSubtarget<"hasAVX()", 
     295                          CCAssignToReg<[ YMM1, YMM2, YMM3, 
     296                                         YMM4, YMM5, YMM6]>>> 
     297 
    291298]>; 
    292299 
    293300def CC_X86_64_HiPE : CallingConv<[ 
  • new file test/CodeGen/X86/ghc-cc64-avx.ll

    diff --git a/test/CodeGen/X86/ghc-cc64-avx.ll b/test/CodeGen/X86/ghc-cc64-avx.ll
    new file mode 100644
    index 0000000..9bd15bf
    - +  
     1; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu -mcpu=core-avx2  | FileCheck %s 
     2 
     3; Check the GHC call convention works (x86-64) 
     4 
     5@base  = external global i64 ; assigned to register: R13 
     6@sp    = external global i64 ; assigned to register: RBP 
     7@hp    = external global i64 ; assigned to register: R12 
     8@r1    = external global i64 ; assigned to register: RBX 
     9@r2    = external global i64 ; assigned to register: R14 
     10@r3    = external global i64 ; assigned to register: RSI 
     11@r4    = external global i64 ; assigned to register: RDI 
     12@r5    = external global i64 ; assigned to register: R8 
     13@r6    = external global i64 ; assigned to register: R9 
     14@splim = external global i64 ; assigned to register: R15 
     15 
     16@v1 = external global  <4 x double>  ; assigned to register: YMM1 
     17@v2 = external global <4 x double>  ; assigned to register: YMM2 
     18@v3 = external global <2 x double>  ; assigned to register: XMM3 
     19@v4 = external global <2 x double> ; assigned to register: XMM4 
     20@v5 = external global <4 x double> ; assigned to register: YMM5 
     21@v6 = external global <4 x double> ; assigned to register: YMM6 
     22 
     23define void @zap(i64 %a, i64 %b) nounwind { 
     24entry: 
     25  ; CHECK:      movq %rdi, %r13 
     26  ; CHECK-NEXT: movq %rsi, %rbp 
     27  ; CHECK-NEXT: callq addtwo 
     28  %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b) 
     29  ; CHECK:      callq foo 
     30  call void @foo() nounwind 
     31  ret void 
     32} 
     33 
     34define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind { 
     35entry: 
     36  ; CHECK:      leaq (%r13,%rbp), %rax 
     37  %0 = add i64 %x, %y 
     38  ; CHECK-NEXT: ret 
     39  ret i64 %0 
     40} 
     41 
     42define cc 10 void @foo() nounwind { 
     43entry: 
     44  ; CHECK: movq base(%rip), %r13 
     45  ; CHECK-NEXT: movq sp(%rip), %rbp 
     46  ; CHECK-NEXT: movq hp(%rip), %r12 
     47  ; CHECK-NEXT: movq r1(%rip), %rbx 
     48  ; CHECK-NEXT: movq r2(%rip), %r14 
     49  ; CHECK-NEXT: movq r3(%rip), %rsi 
     50  ; CHECK-NEXT: movq r4(%rip), %rdi 
     51  ; CHECK-NEXT: movq r5(%rip), %r8 
     52  ; CHECK-NEXT: movq r6(%rip), %r9 
     53  ; CHECK-NEXT: movq splim(%rip), %r15 
     54  ; CHECK-NEXT: vmovaps v6(%rip), %ymm6 
     55  ; CHECK-NEXT: vmovaps v5(%rip), %ymm5 
     56  ; CHECK-NEXT: vmovaps v4(%rip), %xmm4 
     57  ; CHECK-NEXT: vmovaps v3(%rip), %xmm3 
     58  ; CHECK-NEXT: vmovaps v2(%rip), %ymm2 
     59  ; CHECK-NEXT: vmovaps v1(%rip), %ymm1 
     60  %0 = load <4 x double>* @v1 
     61  %1 = load <4 x double>* @v2 
     62  %2 = load <2 x double>* @v3 
     63  %3 = load <2 x double>* @v4 
     64  %4 = load <4 x double>* @v5 
     65  %5 = load <4 x double>* @v6 
     66  %6 = load i64* @splim 
     67  %7 = load i64* @r6 
     68  %8 = load i64* @r5 
     69  %9 = load i64* @r4 
     70  %10 = load i64* @r3 
     71  %11 = load i64* @r2 
     72  %12 = load i64* @r1 
     73  %13 = load i64* @hp 
     74  %14 = load i64* @sp 
     75  %15 = load i64* @base 
     76  ; CHECK: jmp bar 
     77  tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11, 
     78                             i64 %10, i64 %9, i64 %8, i64 %7, i64 %6, 
     79                             <4 x double> %0, <4 x double> %1, <2 x double> %2, <2 x double> %3,<4 x double> %4, <4 x double> %5 ) nounwind 
     80  ret void 
     81} 
     82 
     83declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, 
     84                        <4 x double>, <4 x double>, <2 x double>, <2 x double>, <4 x double>, <4 x double>) 
     85