Changes between Version 4 and Version 5 of SIMDVectorExampleInLLVM


Ignore:
Timestamp:
Oct 25, 2011 6:52:41 PM (4 years ago)
Author:
pmonday
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • SIMDVectorExampleInLLVM

    v4 v5  
    117117Here is the .ll code rewritten with vectorization:
    118118{{{
     119; ModuleID = '/tmp/webcompile/_21191_0.bc'
     120target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
     121target triple = "x86_64-unknown-linux-gnu"
    119122
     123@.str = private unnamed_addr constant [13 x i8] c"%f %f %f %f\0A\00"
    120124
     125define i32 @main() nounwind {
     126  %1 = alloca i32, align 4
     127
     128  ; allocate three vectors
     129  %x = alloca <4 x float>, align 16
     130  %y = alloca <4 x float>, align 16
     131  %z = alloca <4 x float>, align 16
     132
     133  store i32 0, i32* %1
     134
     135  ; store initial values to the x and y vectors
     136  %2 = getelementptr inbounds <4 x float>* %x, i32 0, i64 0
     137  store float 1.000000e+00, float* %2
     138  %3 = getelementptr inbounds <4 x float>* %x, i32 0, i64 1
     139  store float 2.000000e+00, float* %3
     140  %4 = getelementptr inbounds <4 x float>* %x, i32 0, i64 2
     141  store float 3.000000e+00, float* %4
     142  %5 = getelementptr inbounds <4 x float>* %x, i32 0, i64 3
     143  store float 4.000000e+00, float* %5
     144  %6 = getelementptr inbounds <4 x float>* %y, i32 0, i64 0
     145  store float 1.000000e+01, float* %6
     146  %7 = getelementptr inbounds <4 x float>* %y, i32 0, i64 1
     147  store float 2.000000e+01, float* %7
     148  %8 = getelementptr inbounds <4 x float>* %y, i32 0, i64 2
     149  store float 3.000000e+01, float* %8
     150  %9 = getelementptr inbounds <4 x float>* %y, i32 0, i64 3
     151  store float 4.000000e+01, float* %9
     152
     153  ; load the vectors
     154  %xs = load <4 x float>* %x
     155  %ys = load <4 x float>* %y
     156
     157  ; add the vectors
     158  %zs = fadd <4 x float> %xs, %ys
     159
     160  ; store the result vector back to z
     161  store <4 x float> %zs, <4 x float>* %z
     162
     163  ; get the elements out of the vector for printing
     164  %10 = getelementptr inbounds <4 x float>* %z, i32 0, i64 0
     165  %11 = load float* %10
     166  %12 = fpext float %11 to double
     167  %13 = getelementptr inbounds <4 x float>* %z, i32 0, i64 1
     168  %14 = load float* %13
     169  %15 = fpext float %14 to double
     170  %16 = getelementptr inbounds <4 x float>* %z, i32 0, i64 2
     171  %17 = load float* %16
     172  %18 = fpext float %17 to double
     173  %19 = getelementptr inbounds <4 x float>* %z, i32 0, i64 3
     174  %20 = load float* %19
     175  %21 = fpext float %20 to double
     176
     177  ; print the components of z that were extracted above
     178  %22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double %12, double %15, double %18, double %21)
     179
     180  ; return
     181  %23 = load i32* %1
     182  ret i32 %23
     183}
     184
     185declare i32 @printf(i8*, ...)
    121186}}}
    122187