Ticket #1498: 0001-Horrible-hacky-implementation-of-avoiding-heap-check.patch

File 0001-Horrible-hacky-implementation-of-avoiding-heap-check.patch, 13.2 KB (added by batterseapower, 3 years ago)
  • compiler/codeGen/StgCmmExpr.hs

    From 9c0e4c3de27ae6d10f7500395cdbeacd192bc5b7 Mon Sep 17 00:00:00 2001
    From: Max Bolingbroke <[email protected]>
    Date: Wed, 22 Feb 2012 15:33:36 +0000
    Subject: [PATCH] Horrible hacky implementation of avoiding heap check in
     certain primop cases
    
    ---
     compiler/codeGen/StgCmmExpr.hs  |   34 ++++++++++------
     compiler/codeGen/StgCmmHeap.hs  |   79 +++++++++++++++++++++------------------
     compiler/codeGen/StgCmmMonad.hs |   17 ++++++++-
     compiler/codeGen/StgCmmPrim.hs  |    5 ++-
     4 files changed, 84 insertions(+), 51 deletions(-)
    
    diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
    index ccc9e6b..d5d92c3 100644
    a b cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = 
    6767cgExpr (StgOpApp op args ty) = cgOpApp op args ty 
    6868cgExpr (StgConApp con args)  = cgConApp con args 
    6969cgExpr (StgSCC cc tick push expr) = do { emitSetCCC cc tick push; cgExpr expr } 
    70 cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } 
     70cgExpr (StgTick m n expr) = do { setLastGC Nothing; emit (mkTickBox m n); cgExpr expr } -- NB: use setLastGC to avoid duplicating tick 
    7171cgExpr (StgLit lit)       = do cmm_lit <- cgLit lit 
    7272                               emitReturn [CmmLit cmm_lit] 
    7373 
    data GcPlan 
    282282  | NoGcInAlts          -- The scrutinee is a primitive value, or a call to a 
    283283                        -- primitive op which does no GC.  Absorb the allocation 
    284284                        -- of the case alternative(s) into the upstream check 
     285        (Maybe (Bool -> VirtualHpOffset -> FCode ())) -- If Just, *do* make a GC check in the alts, but return to a completly different proc point via this code 
    285286 
    286287------------------------------------- 
    287288cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () 
    288289 
    289 cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts 
     290cgCase e@(StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts 
    290291  | isEnumerationTyCon tycon -- Note [case on bool] 
    291   = do { tag_expr <- do_enum_primop op args 
     292  = do { good_to_delay <- goodToHeapCheckInAlts 
     293       ; tag_expr <- pprTrace "Special case" (ppr e) $ do_enum_primop op args 
    292294 
    293295       -- If the binder is not dead, convert the tag to a constructor 
    294296       -- and assign it. 
    cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts 
    297299            ; emitAssign (CmmLocal tmp_reg) 
    298300                         (tagToClosure tycon tag_expr) } 
    299301 
    300        ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts (NonVoid bndr) alts 
     302       ; mb_gc <- getLastGC 
     303       ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts (guard good_to_delay >> mb_gc)) (NonVoid bndr) alts 
    301304       ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) 
    302305       } 
    303306  where 
    cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts 
    369372       ; v_info <- getCgIdInfo v 
    370373       ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info) 
    371374       ; _ <- bindArgsToRegs [NonVoid bndr] 
    372        ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } 
     375       ; cgAlts (NoGcInAlts Nothing) (NonVoid bndr) alt_type alts } -- TODO: might be useful to say Just if more than 1 alt 
    373376  where 
    374377    reps_compatible = idPrimRep v == idPrimRep bndr 
    375378 
    cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts 
    402405 
    403406cgCase scrut bndr srt alt_type alts  
    404407  = -- the general case 
    405     do { up_hp_usg <- getVirtHp        -- Upstream heap usage 
     408    do { good_to_delay <- goodToHeapCheckInAlts 
    406409       ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts 
    407410             alt_regs  = map idToReg ret_bndrs 
    408411             simple_scrut = isSimpleScrut scrut alt_type 
    409              gcInAlts | not simple_scrut = True 
    410                       | isSingleton alts = False 
    411                       | up_hp_usg > 0    = False 
    412                       | otherwise        = True 
    413              gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts 
     412             gcInAlts | not simple_scrut  = True 
     413                      | not good_to_delay = False 
     414                      | otherwise         = True 
     415             gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts Nothing -- NB: never useful to say just because the alts get a procpoint anyway 
    414416 
    415417       ; mb_cc <- maybeSaveCostCentre simple_scrut 
    416418       ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) 
    cgCase scrut bndr srt alt_type alts 
    420422       ; _ <- bindArgsToRegs ret_bndrs 
    421423       ; cgAlts gc_plan (NonVoid bndr) alt_type alts } 
    422424 
     425goodToHeapCheckInAlts :: [StgAlt] -> FCode Bool 
     426goodToHeapCheckInAlts alts = do 
     427    up_hp_usg <- getVirtHp -- Upstream heap usage 
     428    return (not (isSingleton alts || up_hp_usg > 0)) 
     429 
    423430----------------- 
    424431maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) 
    425432maybeSaveCostCentre simple_scrut 
    cgAltRhss gc_plan bndr alts 
    547554           ; return con } 
    548555 
    549556maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a 
    550 maybeAltHeapCheck NoGcInAlts        code = code 
    551 maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code 
     557maybeAltHeapCheck (NoGcInAlts (Just gc)) code = pprTrace "NoGcInAlts Just" empty $ heapCheck False gc code -- GC, but if it fails return to an earlier point in the code (e.g. before evaluating scrut) 
     558maybeAltHeapCheck (NoGcInAlts Nothing)   code = code 
     559maybeAltHeapCheck (GcInAlts regs _)      code = pprTrace "yeah, altHeapCheck" (ppr regs) $ altHeapCheck regs code 
    552560 
    553561----------------------------------------------------------------------------- 
    554562--      Tail calls 
  • compiler/codeGen/StgCmmHeap.hs

    diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
    index 6533414..38ab8c4 100644
    a b module StgCmmHeap ( 
    1010        getVirtHp, setVirtHp, setRealHp, 
    1111        getHpRelOffset, hpRel, 
    1212 
    13         entryHeapCheck, altHeapCheck, 
     13        entryHeapCheck, altHeapCheck, heapCheck, 
    1414 
    1515        mkVirtHeapOffsets, mkVirtConstrOffsets, 
    1616        mkStaticClosureFields, mkStaticClosure, 
    entryHeapCheck cl_info offset nodeSet arity args code 
    361361            - GC calls, but until then this fishy code works -} 
    362362 
    363363       updfr_sz <- getUpdFrameOff 
    364        heapCheck True (gc_call updfr_sz) code 
     364       do_checks <- mk_gc_checks (return (gc_call updfr_sz)) 
     365       heapCheck True do_checks code 
    365366 
    366367{- 
    367368    -- This code is slightly outdated now and we could easily keep the above 
    entryHeapCheck cl_info offset nodeSet arity args code 
    413414altHeapCheck :: [LocalReg] -> FCode a -> FCode a 
    414415altHeapCheck regs code 
    415416  = do updfr_sz <- getUpdFrameOff 
    416        gc_call_code <- gc_call updfr_sz 
    417        heapCheck False gc_call_code code 
     417       do_checks <- mk_gc_checks (gc_call updfr_sz) 
     418       heapCheck False do_checks code 
    418419 
    419420  where 
    420421    reg_exprs = map (CmmReg . CmmLocal) regs 
    mkGcLabel :: String -> CmmLit 
    450451mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit) 
    451452 
    452453------------------------------- 
    453 heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a 
    454 heapCheck checkStack do_gc code 
     454heapCheck :: Bool -> (Bool -> WordOff -> FCode ()) -> FCode a -> FCode a 
     455heapCheck checkStack do_checks code 
    455456  = getHeapUsage $ \ hpHw -> 
    456457    -- Emit heap checks, but be sure to do it lazily so 
    457458    -- that the conditionals on hpHw don't cause a black hole 
    458     do  { codeOnly $ do_checks checkStack hpHw do_gc 
     459    do  { codeOnly $ do_checks checkStack hpHw 
    459460        ; tickyAllocHeap hpHw 
    460461        ; doGranAllocate hpHw 
    461462        ; setRealHp hpHw 
    462463        ; code } 
    463464 
    464 do_checks :: Bool       -- Should we check the stack? 
    465           -> WordOff    -- Heap headroom 
    466           -> CmmAGraph  -- What to do on failure 
    467           -> FCode () 
    468 do_checks checkStack alloc do_gc = do 
     465mk_gc_checks :: FCode CmmAGraph   -- Build code block handling what to do on failure 
     466             -> FCode (Bool       -- Should we check the stack? 
     467                    -> WordOff    -- Heap headroom 
     468                    -> FCode ()) 
     469mk_gc_checks mk_do_gc = do 
     470  -- Label to loop back to is shared between all instances of the call to the GC function 
    469471  loop_id <- newLabelC 
    470   gc_id <- newLabelC 
    471472  emitLabel loop_id 
    472   hp_check <- if alloc == 0 
    473                  then return mkNop 
    474                  else do 
    475                    ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) 
    476                    return (mkAssign hpReg bump_hp <*> ifthen) 
    477  
    478   if checkStack 
    479      then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check 
    480      else emit hp_check 
    481  
    482   emit $ mkComment (mkFastString "outOfLine should follow:") 
    483  
    484   emitOutOfLine gc_id $ 
    485      mkComment (mkFastString "outOfLine here") <*> 
    486      do_gc <*> 
    487      mkBranch loop_id 
    488                 -- Test for stack pointer exhaustion, then 
    489                 -- bump heap pointer, and test for heap exhaustion 
    490                 -- Note that we don't move the heap pointer unless the 
    491                 -- stack check succeeds.  Otherwise we might end up 
    492                 -- with slop at the end of the current block, which can 
    493                 -- confuse the LDV profiler. 
    494   where 
     473  setLastGC (Just (do_checks loop_id)) -- Store an *action* so that if we reuse the code we don't reuse any labels 
     474  return (do_checks loop_id) 
     475 where 
     476  do_checks loop_id checkStack alloc = do 
     477        do_gc <- mk_do_gc 
     478        gc_id <- newLabelC 
     479        hp_check <- if alloc == 0 
     480                       then return mkNop 
     481                       else do 
     482                         ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) 
     483                         return (mkAssign hpReg bump_hp <*> ifthen) 
     484       
     485        if checkStack 
     486           then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check 
     487           else emit hp_check 
     488       
     489        emit $ mkComment (mkFastString "outOfLine should follow:") 
     490       
     491        emitOutOfLine gc_id $ 
     492           mkComment (mkFastString "outOfLine here") <*> 
     493           do_gc <*> 
     494           mkBranch loop_id 
     495                      -- Test for stack pointer exhaustion, then 
     496                      -- bump heap pointer, and test for heap exhaustion 
     497                      -- Note that we don't move the heap pointer unless the 
     498                      -- stack check succeeds.  Otherwise we might end up 
     499                      -- with slop at the end of the current block, which can 
     500                      -- confuse the LDV profiler. 
     501   where 
    495502    alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes 
    496503    bump_hp   = cmmOffsetExprB (CmmReg hpReg) alloc_lit 
    497504 
  • compiler/codeGen/StgCmmMonad.hs

    diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
    index ccf0777..a096fd2 100644
    a b module StgCmmMonad ( 
    5555        -- more localised access to monad state  
    5656        CgIdInfo(..), CgLoc(..), 
    5757        getBinds, setBinds, getStaticBinds, 
     58    getLastGC, setLastGC, 
    5859 
    5960        -- out of general friendliness, we also export ... 
    6061        CgInfoDownwards(..), CgState(..)        -- non-abstract 
    data CgState 
    267268 
    268269     cgs_hp_usg  :: HeapUsage, 
    269270 
     271     cgs_last_gc :: Maybe (Bool -> WordOff -> FCode ()), 
     272 
    270273     cgs_uniqs :: UniqSupply } 
    271274 
    272275data HeapUsage = 
    initCgState uniqs 
    286289  = MkCgState { cgs_stmts      = mkNop, cgs_tops = nilOL, 
    287290                cgs_binds      = emptyVarEnv,  
    288291                cgs_hp_usg     = initHpUsage, 
     292        cgs_last_gc    = Nothing, 
    289293                cgs_uniqs      = uniqs } 
    290294 
    291295stateIncUsage :: CgState -> CgState -> CgState 
    getStaticBinds = do 
    369373        info  <- getInfoDown 
    370374        return (cgd_statics info) 
    371375 
     376setLastGC :: Maybe (Bool -> WordOff -> FCode ()) -> FCode () 
     377setLastGC mb_gc = do 
     378        state <- getState 
     379        setState $ state {cgs_last_gc = mb_gc} 
     380 
     381getLastGC :: FCode (Maybe (Bool -> WordOff -> FCode ())) 
     382getLastGC = do 
     383        state <- getState 
     384        return (cgs_last_gc state) 
     385 
    372386withState :: FCode a -> CgState -> FCode (a,CgState) 
    373387withState (FCode fcode) newstate = FCode $ \info_down state ->  
    374388        let (retval, state2) = fcode info_down newstate in ((retval,state2), state) 
    forkAlts branch_fcodes 
    559573                  (us1,us2) = splitUniqSupply us 
    560574                  branch_state = (initCgState us1) { 
    561575                                        cgs_binds   = cgs_binds state, 
    562                                         cgs_hp_usg  = cgs_hp_usg state } 
     576                                        cgs_hp_usg  = cgs_hp_usg state, 
     577                                        cgs_last_gc = cgs_last_gc state } 
    563578 
    564579              (_us, results) = mapAccumL compile us branch_fcodes 
    565580              (branch_results, branch_out_states) = unzip results 
  • compiler/codeGen/StgCmmPrim.hs

    diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
    index c95b1f0..6fc2e15 100644
    a b import FastString 
    4646import Outputable 
    4747import StaticFlags 
    4848 
     49import Control.Monad (unless) 
     50 
    4951------------------------------------------------------------------------ 
    5052--      Primitive operations and foreign calls 
    5153------------------------------------------------------------------------ 
    cgPrimOp :: [LocalReg] -- where to put the results 
    137139           -> FCode () 
    138140 
    139141cgPrimOp results op args 
    140   = do arg_exprs <- getNonVoidArgAmodes args 
     142  = do unless (primOpIsCheap op) $ setLastGC Nothing -- Prevent duplicating expensive/side-effecting primops. TODO: ok to duplicate failing ones! 
     143       arg_exprs <- getNonVoidArgAmodes args 
    141144       emitPrimOp results op arg_exprs 
    142145 
    143146