Ticket #8256: patch-prefetch-squash1.diff

File patch-prefetch-squash1.diff, 15.1 KB (added by carter, 2 years ago)

updated diff / patch

  • compiler/codeGen/StgCmmPrim.hs

    From 7c4f5b6967f795ec1e0d6036d277fc3b4f6a095f Mon Sep 17 00:00:00 2001
    From: Carter Tazio Schonwald <[email protected]>
    Date: Sun, 8 Sep 2013 21:42:36 -0400
    Subject: [PATCH] removing redundant prefetch case
    
    ---
     compiler/codeGen/StgCmmPrim.hs | 9 ---------
     1 file changed, 9 deletions(-)
    
    diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
    index bb58024..e63378a 100644
    a b emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg] 
    255255emitPrimOp _ res@[] TouchOp args@[_arg]
    256256   = do emitPrimCall res MO_Touch args
    257257
    258 emitPrimOp _ res@[] PrefetchByteArrayOp args@[_arg]
    259    = do emitPrimCall res MO_Prefetch_Data args
    260 
    261 emitPrimOp _ res@[] PrefetchMutableByteArrayOp args@[_arg]
    262    = do emitPrimCall res MO_Prefetch_Data args
    263 
    264 emitPrimOp _ res@[] PrefetchAddrOp args@[_arg]
    265    = do emitPrimCall res MO_Prefetch_Data args
    266 
    267258--  #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
    268259emitPrimOp dflags [res] ByteArrayContents_Char [arg]
    269260   = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
  • compiler/cmm/CmmMachOp.hs

    -- 
    1.8.2
    
    From 1b07948b50b64dc40413fc16b396d8f98bcc6188 Mon Sep 17 00:00:00 2001
    From: Carter Tazio Schonwald <[email protected]>
    Date: Sun, 8 Sep 2013 21:58:25 -0400
    Subject: [PATCH] some other fixups for adding prefetch i overlooked.
    
    note:should also change the native code gens to support prefetch. should be easy!
    (not doing it for now, but should do it this week) (+1 squashed commit)
    Squashed commits:
    [2a433e2] a first pass at all the machinery for supporting all the different read prefetches, need to test if it builds :)
    ---
     compiler/cmm/CmmMachOp.hs               |  8 +++-
     compiler/cmm/CmmParse.y                 |  9 ++++-
     compiler/cmm/PprC.hs                    |  4 +-
     compiler/codeGen/StgCmmPrim.hs          | 41 ++++++++++++++-------
     compiler/llvmGen/LlvmCodeGen/CodeGen.hs | 12 ++++--
     compiler/nativeGen/PPC/CodeGen.hs       |  2 +-
     compiler/nativeGen/SPARC/CodeGen.hs     |  2 +-
     compiler/nativeGen/X86/CodeGen.hs       |  6 ++-
     compiler/prelude/primops.txt.pp         | 65 +++++++++++++++++++++++++++++++--
     9 files changed, 120 insertions(+), 29 deletions(-)
    
    diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
    index 8d42bbd..a398336 100644
    a b data CallishMachOp 
    518518  | MO_Touch         -- Keep variables live (when using interior pointers)
    519519
    520520  -- Prefetch
    521   | MO_Prefetch_Data -- Prefetch hint. May change program performance but not
     521  | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not
    522522                     -- program behavior.
     523                     -- the Int can be 0-3. Needs to be known at compile time
     524                     -- to interact with code generation correctly.
     525                     --  TODO: add support for prefetch WRITES,
     526                     --  currently only exposes prefetch reads, which
     527                     -- would the majority of use cases in ghc anyways
     528
    523529
    524530  -- Note that these three MachOps all take 1 extra parameter than the
    525531  -- standard C lib versions. The extra (last) parameter contains
  • compiler/cmm/CmmParse.y

    diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
    index 8c36dea..830cb92 100644
    a b callishMachOps = listToUFM $ 
    923923        ( "write_barrier", MO_WriteBarrier ),
    924924        ( "memcpy", MO_Memcpy ),
    925925        ( "memset", MO_Memset ),
    926         ( "memmove", MO_Memmove )
     926        ( "memmove", MO_Memmove ),
     927
     928        ("prefetch0",MO_Prefetch_Data 0),
     929        ("prefetch1",MO_Prefetch_Data 1),
     930        ("prefetch2",MO_Prefetch_Data 2),
     931        ("prefetch3",MO_Prefetch_Data 3)
     932
    927933        -- ToDo: the rest, maybe
     934        -- edit; which rest?
    928935    ]
    929936
    930937parseSafety :: String -> P Safety
  • compiler/cmm/PprC.hs

    diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
    index 149968d..8a446d1 100644
    a b pprCallishMachOp_for_C mop 
    748748        MO_Add2       {} -> unsupported
    749749        MO_U_Mul2     {} -> unsupported
    750750        MO_Touch         -> unsupported
    751         MO_Prefetch_Data -> unsupported
     751        (MO_Prefetch_Data _ ) -> unsupported
     752        --- we COULD support prefetch via "__builtin_prefetch"
     753        --- Not addin g it for now
    752754    where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop
    753755                            ++ " not supported!")
    754756
  • compiler/codeGen/StgCmmPrim.hs

    diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
    index e63378a..cf53ed4 100644
    a b emitPrimOp _ [res] Int64X2InsertOp [v,e,i] = 
    615615    doVecInsertOp Nothing vec2b64 v e i res
    616616
    617617-- Prefetch
    618 emitPrimOp _ res PrefetchByteArrayOp        args = doPrefetchByteArrayOp res args
    619 emitPrimOp _ res PrefetchMutableByteArrayOp args = doPrefetchByteArrayOp res args
    620 emitPrimOp _ res PrefetchAddrOp             args = doPrefetchAddrOp res args
     618emitPrimOp _ res PrefetchByteArrayOp3        args = doPrefetchByteArrayOp 3 res args
     619emitPrimOp _ res PrefetchMutableByteArrayOp3 args = doPrefetchByteArrayOp 3 res args
     620emitPrimOp _ res PrefetchAddrOp3             args = doPrefetchAddrOp  3 res args
     621
     622emitPrimOp _ res PrefetchByteArrayOp2        args = doPrefetchByteArrayOp 2 res args
     623emitPrimOp _ res PrefetchMutableByteArrayOp2 args = doPrefetchByteArrayOp 2 res args
     624emitPrimOp _ res PrefetchAddrOp2             args = doPrefetchAddrOp 2 res args
     625
     626emitPrimOp _ res PrefetchByteArrayOp1        args = doPrefetchByteArrayOp 1 res args
     627emitPrimOp _ res PrefetchMutableByteArrayOp1 args = doPrefetchByteArrayOp 1 res args
     628emitPrimOp _ res PrefetchAddrOp1             args = doPrefetchAddrOp 1 res args
     629
     630emitPrimOp _ res PrefetchByteArrayOp0        args = doPrefetchByteArrayOp 0 res args
     631emitPrimOp _ res PrefetchMutableByteArrayOp0 args = doPrefetchByteArrayOp 0 res args
     632emitPrimOp _ res PrefetchAddrOp0             args = doPrefetchAddrOp 0 res args
     633
    621634
    622635-- The rest just translate straightforwardly
    623636emitPrimOp dflags [res] op [arg]
    doVecInsertOp maybe_pre_write_cast ty src e idx res = do 
    12721285------------------------------------------------------------------------------
    12731286-- Helpers for translating prefetching.
    12741287
    1275 doPrefetchByteArrayOp :: [LocalReg]
     1288doPrefetchByteArrayOp :: Int -> [LocalReg]
    12761289                      -> [CmmExpr]
    12771290                      -> FCode ()
    1278 doPrefetchByteArrayOp res [addr,idx]
     1291doPrefetchByteArrayOp locality res [addr,idx]
    12791292   = do dflags <- getDynFlags
    1280         mkBasicPrefetch (arrWordsHdrSize dflags) res addr idx
    1281 doPrefetchByteArrayOp _ _
     1293        mkBasicPrefetch locality (arrWordsHdrSize dflags) res addr idx
     1294doPrefetchByteArrayOp _ _ _
    12821295   = panic "StgCmmPrim: doPrefetchByteArrayOp"
    12831296
    1284 doPrefetchAddrOp :: [LocalReg]
     1297doPrefetchAddrOp ::Int  -> [LocalReg]
    12851298                 -> [CmmExpr]
    12861299                 -> FCode ()
    1287 doPrefetchAddrOp res [addr,idx]
    1288    = mkBasicPrefetch 0 res addr idx
    1289 doPrefetchAddrOp _ _
     1300doPrefetchAddrOp locality  res [addr,idx]
     1301   = mkBasicPrefetch locality 0 res addr idx
     1302doPrefetchAddrOp _ _  _
    12901303   = panic "StgCmmPrim: doPrefetchAddrOp"
    12911304
    1292 mkBasicPrefetch :: ByteOff      -- Initial offset in bytes
     1305mkBasicPrefetch :: Int ->  ByteOff      -- Initial offset in bytes
    12931306                -> [LocalReg]   -- Destination
    12941307                -> CmmExpr      -- Base address
    12951308                -> CmmExpr      -- Index
    12961309                -> FCode ()
    1297 mkBasicPrefetch off res base idx
     1310mkBasicPrefetch locality off res base idx
    12981311   = do dflags <- getDynFlags
    1299         emitPrimCall [] MO_Prefetch_Data [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
     1312        emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
    13001313        case res of
    13011314          []    -> return ()
    13021315          [reg] -> emitAssign (CmmLocal reg) base
  • compiler/llvmGen/LlvmCodeGen/CodeGen.hs

    diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
    index def9e2b..ad0db86 100644
    a b genCall (PrimTarget (MO_UF_Conv _)) [_] args = 
    200200    "Can only handle 1, given" ++ show (length args) ++ "."
    201201
    202202-- Handle prefetching data
    203 genCall t@(PrimTarget MO_Prefetch_Data) [] args = do
     203genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
     204  | 0 <= localityInt && localityInt <= 3 = do
    204205    ver <- getLlvmVer
    205206    let argTy | ver <= 29  = [i8Ptr, i32, i32]
    206207              | otherwise  = [i8Ptr, i32, i32, i32]
    genCall t@(PrimTarget MO_Prefetch_Data) [] args = do 
    214215    (argVars', stmts3)      <- castVars $ zip argVars argTy
    215216
    216217    trash <- getTrashStmts
    217     let argSuffix | ver <= 29  = [mkIntLit i32 0, mkIntLit i32 3]
    218                   | otherwise  = [mkIntLit i32 0, mkIntLit i32 3, mkIntLit i32 1]
     218    let argSuffix | ver <= 29  = [mkIntLit i32 0, mkIntLit i32 localityInt]
     219                  | otherwise  = [mkIntLit i32 0, mkIntLit i32 localityInt, mkIntLit i32 1]
    219220        call = Expr $ Call StdCall fptr (argVars' ++ argSuffix) []
    220221        stmts = stmts1 `appOL` stmts2 `appOL` stmts3
    221222                `appOL` trash `snocOL` call
    222223    return (stmts, top1 ++ top2)
     224  | otherwise = panic $ "prefetch locality level integermust be between 0 and 3, given: " ++ (show localityInt)
    223225
    224226-- Handle PopCnt and BSwap that need to only convert arg and return types
    225227genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
    cmmPrimOpFunctions mop = do 
    545547    (MO_PopCnt w) -> fsLit $ "llvm.ctpop."  ++ showSDoc dflags (ppr $ widthToLlvmInt w)
    546548    (MO_BSwap w)  -> fsLit $ "llvm.bswap."  ++ showSDoc dflags (ppr $ widthToLlvmInt w)
    547549
    548     MO_Prefetch_Data -> fsLit "llvm.prefetch"
     550    (MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
    549551
     552
     553    -- aren't these supported by llvm?
    550554    MO_S_QuotRem {}  -> unsupported
    551555    MO_U_QuotRem {}  -> unsupported
    552556    MO_U_QuotRem2 {} -> unsupported
  • compiler/nativeGen/PPC/CodeGen.hs

    diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
    index 65533d8..f018276 100644
    a b genCCall' dflags gcp target dest_regs args0 
    11651165                    MO_U_Mul2 {}     -> unsupported
    11661166                    MO_WriteBarrier  -> unsupported
    11671167                    MO_Touch         -> unsupported
    1168                     MO_Prefetch_Data -> unsupported
     1168                    (MO_Prefetch_Data _ ) -> unsupported
    11691169                unsupported = panic ("outOfLineCmmOp: " ++ show mop
    11701170                                  ++ " not supported")
    11711171
  • compiler/nativeGen/SPARC/CodeGen.hs

    diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
    index 5d2b9a9..4f670f4 100644
    a b outOfLineMachOp_table mop 
    657657        MO_U_Mul2 {}     -> unsupported
    658658        MO_WriteBarrier  -> unsupported
    659659        MO_Touch         -> unsupported
    660         MO_Prefetch_Data -> unsupported
     660        (MO_Prefetch_Data _) -> unsupported
    661661    where unsupported = panic ("outOfLineCmmOp: " ++ show mop
    662662                            ++ " not supported here")
    663663
  • compiler/nativeGen/X86/CodeGen.hs

    diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
    index f6143d3..1b910e5 100644
    a b genCCall _ (PrimTarget MO_WriteBarrier) _ _ = return nilOL 
    16561656
    16571657genCCall _ (PrimTarget MO_Touch) _ _ = return nilOL
    16581658
    1659 genCCall _ (PrimTarget MO_Prefetch_Data) _ _ = return nilOL
     1659genCCall _ (PrimTarget (MO_Prefetch_Data _ )) _ _ = return nilOL
     1660-- could easily add prefetch to x86 that exactly reflects
     1661--- prefetces 0-3, not doing that for now
    16601662
    16611663genCCall is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] = do
    16621664    dflags <- getDynFlags
    outOfLineCmmOp mop res args 
    23592361              MO_U_Mul2 {}     -> unsupported
    23602362              MO_WriteBarrier  -> unsupported
    23612363              MO_Touch         -> unsupported
    2362               MO_Prefetch_Data -> unsupported
     2364              (MO_Prefetch_Data _ ) -> unsupported
    23632365        unsupported = panic ("outOfLineCmmOp: " ++ show mop
    23642366                          ++ " not supported here")
    23652367
  • compiler/prelude/primops.txt.pp

    diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
    index 094c2f5..ed9d5ff 100644
    a b primop WriteOffAddrOp_Int64AsInt64X2 "writeInt64OffAddrAsInt64X2#" GenPrimOp 
    27572757   
    27582758------------------------------------------------------------------------
    27592759section "Prefetch"
    2760         {Prefetch operations}
     2760        {Prefetch operations: Note how there are locality levels 0-3.
     2761  On x86_64 systems, 0 will correspond with a Nontemporal (NTA)  prefetch,
     2762  which is not persisted in any level of cache. NTA prefetch is suitable for
     2763  streaming  single pass workloads, and perhaps certain workloads larger
     2764  than a CPU's largest cache.
     2765 
     2766  Higher locality numbers correspond}
    27612767------------------------------------------------------------------------
    27622768
    2763 primop PrefetchByteArrayOp "prefetchByteArray#" GenPrimOp
     2769----
     2770--- the Int# argument for prefetch is the byte offset on the byteArray or  Addr#
     2771
     2772---
     2773primop PrefetchByteArrayOp3 "prefetchByteArray3#" GenPrimOp
     2774   ByteArray# -> Int# -> ByteArray#
     2775   with llvm_only = True
     2776
     2777primop PrefetchMutableByteArrayOp3 "prefetchMutableByteArray3#" GenPrimOp
     2778   MutableByteArray# s -> Int# -> State# s -> State# s
     2779   with has_side_effects = True
     2780        llvm_only = True
     2781
     2782primop PrefetchAddrOp3 "prefetchAddr3#" GenPrimOp
     2783    Addr# -> Int# -> Addr#
     2784    with llvm_only = True
     2785
     2786----
     2787
     2788primop PrefetchByteArrayOp2 "prefetchByteArray2#" GenPrimOp
     2789   ByteArray# -> Int# -> ByteArray#
     2790   with llvm_only = True
     2791
     2792primop PrefetchMutableByteArrayOp2 "prefetchMutableByteArray2#" GenPrimOp
     2793   MutableByteArray# s -> Int# -> State# s -> State# s
     2794   with has_side_effects = True
     2795        llvm_only = True
     2796
     2797primop PrefetchAddrOp2 "prefetchAddr2#" GenPrimOp
     2798   Addr# -> Int# -> Addr#
     2799   with llvm_only = True
     2800
     2801----
     2802
     2803primop PrefetchByteArrayOp1 "prefetchByteArray1#" GenPrimOp
    27642804   ByteArray# -> Int# -> ByteArray#
    27652805   with llvm_only = True
    27662806
    2767 primop PrefetchMutableByteArrayOp "prefetchMutableByteArray#" GenPrimOp
     2807primop PrefetchMutableByteArrayOp1 "prefetchMutableByteArray1#" GenPrimOp
    27682808   MutableByteArray# s -> Int# -> State# s -> State# s
    27692809   with has_side_effects = True
    27702810        llvm_only = True
    27712811
    2772 primop PrefetchAddrOp "prefetchAddr#" GenPrimOp
     2812primop PrefetchAddrOp1 "prefetchAddr1#" GenPrimOp
    27732813   Addr# -> Int# -> Addr#
    27742814   with llvm_only = True
    27752815
     2816----
     2817
     2818primop PrefetchByteArrayOp0 "prefetchByteArray0#" GenPrimOp
     2819   ByteArray# -> Int# -> ByteArray#
     2820   with llvm_only = True
     2821
     2822primop PrefetchMutableByteArrayOp0 "prefetchMutableByteArray0#" GenPrimOp
     2823   MutableByteArray# s -> Int# -> State# s -> State# s
     2824   with has_side_effects = True
     2825        llvm_only = True
     2826
     2827primop PrefetchAddrOp0 "prefetchAddr0#" GenPrimOp
     2828   Addr# -> Int# -> Addr#
     2829   with llvm_only = True
     2830
     2831
     2832
    27762833------------------------------------------------------------------------
    27772834---                                                                  ---
    27782835------------------------------------------------------------------------