Ticket #8256: patch-prefetch-squash1.diff

File patch-prefetch-squash1.diff, 15.1 KB (added by carter, 7 months ago)

updated diff / patch

  • compiler/codeGen/StgCmmPrim.hs

    From 7c4f5b6967f795ec1e0d6036d277fc3b4f6a095f Mon Sep 17 00:00:00 2001
    From: Carter Tazio Schonwald <carter.schonwald@gmail.com>
    Date: Sun, 8 Sep 2013 21:42:36 -0400
    Subject: [PATCH] removing redundant prefetch case
    
    ---
     compiler/codeGen/StgCmmPrim.hs | 9 ---------
     1 file changed, 9 deletions(-)
    
    diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
    index bb58024..e63378a 100644
    a b emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg] 
    255255emitPrimOp _ res@[] TouchOp args@[_arg] 
    256256   = do emitPrimCall res MO_Touch args 
    257257 
    258 emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg] 
    259    = do emitPrimCall res MO_Prefetch_Data args 
    260  
    261 emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg] 
    262    = do emitPrimCall res MO_Prefetch_Data args 
    263  
    264 emitPrimOp _ res@[] PrefetchAddrOp args@[_arg] 
    265    = do emitPrimCall res MO_Prefetch_Data args 
    266  
    267258--  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) 
    268259emitPrimOp dflags [res] ByteArrayContents_Char [arg] 
    269260   = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags)) 
  • compiler/cmm/CmmMachOp.hs

    -- 
    1.8.2
    
    From 1b07948b50b64dc40413fc16b396d8f98bcc6188 Mon Sep 17 00:00:00 2001
    From: Carter Tazio Schonwald <carter.schonwald@gmail.com>
    Date: Sun, 8 Sep 2013 21:58:25 -0400
    Subject: [PATCH] some other fixups for adding prefetch i overlooked.
    
    note:should also change the native code gens to support prefetch. should be easy!
    (not doing it for now, but should do it this week) (+1 squashed commit)
    Squashed commits:
    [2a433e2] a first pass at all the machinery for supporting all the different read prefetches, need to test if it builds :)
    ---
     compiler/cmm/CmmMachOp.hs               |  8 +++-
     compiler/cmm/CmmParse.y                 |  9 ++++-
     compiler/cmm/PprC.hs                    |  4 +-
     compiler/codeGen/StgCmmPrim.hs          | 41 ++++++++++++++-------
     compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 12 ++++--
     compiler/nativeGen/PPC/CodeGen.hs       |  2 +-
     compiler/nativeGen/SPARC/CodeGen.hs     |  2 +-
     compiler/nativeGen/X86/CodeGen.hs       |  6 ++-
     compiler/prelude/primops.txt.pp         | 65 +++++++++++++++++++++++++++++++--
     9 files changed, 120 insertions(+), 29 deletions(-)
    
    diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
    index 8d42bbd..a398336 100644
    a b data CallishMachOp 
    518518  | MO_Touch         -- Keep variables live (when using interior pointers) 
    519519 
    520520  -- Prefetch 
    521   | MO_Prefetch_Data -- Prefetch hint. May change program performance but not 
     521  | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not 
    522522                     -- program behavior. 
     523                     -- the Int can be 0-3. Needs to be known at compile time 
     524                     -- to interact with code generation correctly. 
     525                     --  TODO: add support for prefetch WRITES, 
     526                     --  currently only exposes prefetch reads, which  
     527                     -- would the majority of use cases in ghc anyways 
     528 
    523529 
    524530  -- Note that these three MachOps all take 1 extra parameter than the 
    525531  -- standard C lib versions. The extra (last) parameter contains 
  • compiler/cmm/CmmParse.y

    diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
    index 8c36dea..830cb92 100644
    a b callishMachOps = listToUFM $ 
    923923        ( "write_barrier", MO_WriteBarrier ), 
    924924        ( "memcpy", MO_Memcpy ), 
    925925        ( "memset", MO_Memset ), 
    926         ( "memmove", MO_Memmove ) 
     926        ( "memmove", MO_Memmove ), 
     927 
     928        ("prefetch0",MO_Prefetch_Data 0), 
     929        ("prefetch1",MO_Prefetch_Data 1), 
     930        ("prefetch2",MO_Prefetch_Data 2), 
     931        ("prefetch3",MO_Prefetch_Data 3) 
     932 
    927933        -- ToDo: the rest, maybe 
     934        -- edit; which rest? 
    928935    ] 
    929936 
    930937parseSafety :: String -> P Safety 
  • compiler/cmm/PprC.hs

    diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
    index 149968d..8a446d1 100644
    a b pprCallishMachOp_for_C mop 
    748748        MO_Add2       {} -> unsupported 
    749749        MO_U_Mul2     {} -> unsupported 
    750750        MO_Touch         -> unsupported 
    751         MO_Prefetch_Data -> unsupported 
     751        (MO_Prefetch_Data _ ) -> unsupported 
     752        --- we COULD support prefetch via "__builtin_prefetch" 
     753        --- Not addin g it for now 
    752754    where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop 
    753755                            ++ " not supported!") 
    754756 
  • compiler/codeGen/StgCmmPrim.hs

    diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
    index e63378a..cf53ed4 100644
    a b emitPrimOp _ [res] Int64X2InsertOp [v,e,i] = 
    615615    doVecInsertOp Nothing vec2b64 v e i res 
    616616 
    617617-- Prefetch 
    618 emitPrimOp _ res PrefetchByteArrayOp        args = doPrefetchByteArrayOp res args 
    619 emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args 
    620 emitPrimOp _ res PrefetchAddrOp             args = doPrefetchAddrOp res args 
     618emitPrimOp _ res PrefetchByteArrayOp3        args = doPrefetchByteArrayOp 3 res args 
     619emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args 
     620emitPrimOp _ res PrefetchAddrOp3             args = doPrefetchAddrOp  3 res args 
     621 
     622emitPrimOp _ res PrefetchByteArrayOp2        args = doPrefetchByteArrayOp 2 res args 
     623emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args 
     624emitPrimOp _ res PrefetchAddrOp2             args = doPrefetchAddrOp 2 res args 
     625 
     626emitPrimOp _ res PrefetchByteArrayOp1        args = doPrefetchByteArrayOp 1 res args 
     627emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args 
     628emitPrimOp _ res PrefetchAddrOp1             args = doPrefetchAddrOp 1 res args 
     629 
     630emitPrimOp _ res PrefetchByteArrayOp0        args = doPrefetchByteArrayOp 0 res args 
     631emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args 
     632emitPrimOp _ res PrefetchAddrOp0             args = doPrefetchAddrOp 0 res args 
     633 
    621634 
    622635-- The rest just translate straightforwardly 
    623636emitPrimOp dflags [res] op [arg] 
    doVecInsertOp maybe_pre_write_cast ty src e idx res = do 
    12721285------------------------------------------------------------------------------ 
    12731286-- Helpers for translating prefetching. 
    12741287 
    1275 doPrefetchByteArrayOp :: [LocalReg] 
     1288doPrefetchByteArrayOp :: Int -> [LocalReg] 
    12761289                      -> [CmmExpr] 
    12771290                      -> FCode () 
    1278 doPrefetchByteArrayOp res [addr,idx] 
     1291doPrefetchByteArrayOp locality res [addr,idx] 
    12791292   = do dflags <- getDynFlags 
    1280         mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx 
    1281 doPrefetchByteArrayOp _ _ 
     1293        mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx 
     1294doPrefetchByteArrayOp _ _ _ 
    12821295   = panic "StgCmmPrim: doPrefetchByteArrayOp" 
    12831296 
    1284 doPrefetchAddrOp :: [LocalReg] 
     1297doPrefetchAddrOp ::Int  -> [LocalReg] 
    12851298                 -> [CmmExpr] 
    12861299                 -> FCode () 
    1287 doPrefetchAddrOp res [addr,idx] 
    1288    = mkBasicPrefetch 0 res addr idx 
    1289 doPrefetchAddrOp _ _ 
     1300doPrefetchAddrOp locality  res [addr,idx] 
     1301   = mkBasicPrefetch locality 0 res addr idx 
     1302doPrefetchAddrOp _ _  _ 
    12901303   = panic "StgCmmPrim: doPrefetchAddrOp" 
    12911304 
    1292 mkBasicPrefetch :: ByteOff      -- Initial offset in bytes 
     1305mkBasicPrefetch :: Int ->  ByteOff      -- Initial offset in bytes 
    12931306                -> [LocalReg]   -- Destination 
    12941307                -> CmmExpr      -- Base address 
    12951308                -> CmmExpr      -- Index 
    12961309                -> FCode () 
    1297 mkBasicPrefetch off res base idx 
     1310mkBasicPrefetch locality off res base idx 
    12981311   = do dflags <- getDynFlags 
    1299         emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx] 
     1312        emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx] 
    13001313        case res of 
    13011314          []    -> return () 
    13021315          [reg] -> emitAssign (CmmLocal reg) base 
  • compiler/llvmGen/LlvmCodeGen/CodeGen.hs

    diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
    index def9e2b..ad0db86 100644
    a b genCall (PrimTarget (MO_UF_Conv _)) [_] args = 
    200200    "Can only handle 1, given" ++ show (length args) ++ "." 
    201201 
    202202-- Handle prefetching data 
    203 genCall t@(PrimTarget MO_Prefetch_Data) [] args = do 
     203genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args  
     204  | 0 <= localityInt && localityInt <= 3 = do 
    204205    ver <- getLlvmVer 
    205206    let argTy | ver <= 29  = [i8Ptr, i32, i32] 
    206207              | otherwise  = [i8Ptr, i32, i32, i32] 
    genCall t@(PrimTarget MO_Prefetch_Data) [] args = do 
    214215    (argVars', stmts3)      <- castVars $ zip argVars argTy 
    215216 
    216217    trash <- getTrashStmts 
    217     let argSuffix | ver <= 29  = [mkIntLit i32 0, mkIntLit i32 3] 
    218                   | otherwise  = [mkIntLit i32 0, mkIntLit i32 3, mkIntLit i32 1] 
     218    let argSuffix | ver <= 29  = [mkIntLit i32 0, mkIntLit i32 localityInt] 
     219                  | otherwise  = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1] 
    219220        call = Expr $ Call StdCall fptr (argVars' ++ argSuffix) [] 
    220221        stmts = stmts1 `appOL` stmts2 `appOL` stmts3 
    221222                `appOL` trash `snocOL` call 
    222223    return (stmts, top1 ++ top2) 
     224  | otherwise = panic $ "prefetch locality level integermust be between 0 and 3, given: " ++ (show localityInt)  
    223225 
    224226-- Handle PopCnt and BSwap that need to only convert arg and return types 
    225227genCall t@(PrimTarget (MO_PopCnt w)) dsts args = 
    cmmPrimOpFunctions mop = do 
    545547    (MO_PopCnt w) -> fsLit $ "llvm.ctpop."  ++ showSDoc dflags (ppr $ widthToLlvmInt w) 
    546548    (MO_BSwap w)  -> fsLit $ "llvm.bswap."  ++ showSDoc dflags (ppr $ widthToLlvmInt w) 
    547549 
    548     MO_Prefetch_Data -> fsLit "llvm.prefetch" 
     550    (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch" 
    549551 
     552 
     553    -- aren't these supported by llvm? 
    550554    MO_S_QuotRem {}  -> unsupported 
    551555    MO_U_QuotRem {}  -> unsupported 
    552556    MO_U_QuotRem2 {} -> unsupported 
  • compiler/nativeGen/PPC/CodeGen.hs

    diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
    index 65533d8..f018276 100644
    a b genCCall' dflags gcp target dest_regs args0 
    11651165                    MO_U_Mul2 {}     -> unsupported 
    11661166                    MO_WriteBarrier  -> unsupported 
    11671167                    MO_Touch         -> unsupported 
    1168                     MO_Prefetch_Data -> unsupported 
     1168                    (MO_Prefetch_Data _ ) -> unsupported 
    11691169                unsupported = panic ("outOfLineCmmOp: " ++ show mop 
    11701170                                  ++ " not supported") 
    11711171 
  • compiler/nativeGen/SPARC/CodeGen.hs

    diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
    index 5d2b9a9..4f670f4 100644
    a b outOfLineMachOp_table mop 
    657657        MO_U_Mul2 {}     -> unsupported 
    658658        MO_WriteBarrier  -> unsupported 
    659659        MO_Touch         -> unsupported 
    660         MO_Prefetch_Data -> unsupported 
     660        (MO_Prefetch_Data _) -> unsupported 
    661661    where unsupported = panic ("outOfLineCmmOp: " ++ show mop 
    662662                            ++ " not supported here") 
    663663 
  • compiler/nativeGen/X86/CodeGen.hs

    diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
    index f6143d3..1b910e5 100644
    a b genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL 
    16561656 
    16571657genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL 
    16581658 
    1659 genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL 
     1659genCCall _ (PrimTarget (MO_Prefetch_Data _ )) _ _ = return nilOL 
     1660-- could easily add prefetch to x86 that exactly reflects 
     1661--- prefetces 0-3, not doing that for now 
    16601662 
    16611663genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do 
    16621664    dflags <- getDynFlags 
    outOfLineCmmOp mop res args 
    23592361              MO_U_Mul2 {}     -> unsupported 
    23602362              MO_WriteBarrier  -> unsupported 
    23612363              MO_Touch         -> unsupported 
    2362               MO_Prefetch_Data -> unsupported 
     2364              (MO_Prefetch_Data _ ) -> unsupported 
    23632365        unsupported = panic ("outOfLineCmmOp: " ++ show mop 
    23642366                          ++ " not supported here") 
    23652367 
  • compiler/prelude/primops.txt.pp

    diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
    index 094c2f5..ed9d5ff 100644
    a b primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp 
    27572757    
    27582758------------------------------------------------------------------------ 
    27592759section "Prefetch"  
    2760         {Prefetch operations} 
     2760        {Prefetch operations: Note how there are locality levels 0-3. 
     2761  On x86_64 systems, 0 will correspond with a Nontemporal (NTA)  prefetch, 
     2762  which is not persisted in any level of cache. NTA prefetch is suitable for  
     2763  streaming  single pass workloads, and perhaps certain workloads larger 
     2764  than a CPU's largest cache.  
     2765   
     2766  Higher locality numbers correspond} 
    27612767------------------------------------------------------------------------ 
    27622768 
    2763 primop PrefetchByteArrayOp "prefetchByteArray#" GenPrimOp 
     2769---- 
     2770--- the Int# argument for prefetch is the byte offset on the byteArray or  Addr# 
     2771 
     2772--- 
     2773primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp 
     2774   ByteArray# -> Int# -> ByteArray# 
     2775   with llvm_only = True 
     2776 
     2777primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp 
     2778   MutableByteArray# s -> Int# -> State# s -> State# s 
     2779   with has_side_effects = True 
     2780        llvm_only = True 
     2781 
     2782primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp 
     2783    Addr# -> Int# -> Addr# 
     2784    with llvm_only = True 
     2785 
     2786---- 
     2787 
     2788primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp 
     2789   ByteArray# -> Int# -> ByteArray# 
     2790   with llvm_only = True 
     2791 
     2792primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp 
     2793   MutableByteArray# s -> Int# -> State# s -> State# s 
     2794   with has_side_effects = True 
     2795        llvm_only = True 
     2796 
     2797primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp 
     2798   Addr# -> Int# -> Addr# 
     2799   with llvm_only = True 
     2800 
     2801---- 
     2802 
     2803primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp 
    27642804   ByteArray# -> Int# -> ByteArray# 
    27652805   with llvm_only = True 
    27662806 
    2767 primop PrefetchMutableByteArrayOp "prefetchMutableByteArray#" GenPrimOp 
     2807primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp 
    27682808   MutableByteArray# s -> Int# -> State# s -> State# s 
    27692809   with has_side_effects = True 
    27702810        llvm_only = True 
    27712811 
    2772 primop PrefetchAddrOp "prefetchAddr#" GenPrimOp 
     2812primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp 
    27732813   Addr# -> Int# -> Addr# 
    27742814   with llvm_only = True 
    27752815 
     2816---- 
     2817 
     2818primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp 
     2819   ByteArray# -> Int# -> ByteArray# 
     2820   with llvm_only = True 
     2821 
     2822primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp 
     2823   MutableByteArray# s -> Int# -> State# s -> State# s 
     2824   with has_side_effects = True 
     2825        llvm_only = True 
     2826 
     2827primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp 
     2828   Addr# -> Int# -> Addr# 
     2829   with llvm_only = True 
     2830 
     2831 
     2832 
    27762833------------------------------------------------------------------------ 
    27772834---                                                                  --- 
    27782835------------------------------------------------------------------------