Ticket #3867: 0001-Support-large-SLIDE-instructions.patch

File 0001-Support-large-SLIDE-instructions.patch, 17.4 KB (added by pcapriotti, 3 years ago)
  • compiler/ghci/ByteCodeGen.lhs

    From 1c3056dda8d2570b6aa4f215239a2b2c2fe77425 Mon Sep 17 00:00:00 2001
    From: Paolo Capriotti <[email protected]>
    Date: Thu, 5 Apr 2012 09:52:18 +0100
    Subject: [PATCH 1/4] Support large SLIDE instructions.
    
    The bytecode generator used to keep track of the stack depth with a
    16-bit counter, which could overflow for very large BCOs, resulting in
    incorrect bytecode.
    
    This commit switches to a word-sized counter, and eagerly panics
    whenever an operand is too big, instead of truncating the result.
    
    This allows us to work around the 16-bit limitation in the case of SLIDE
    instructions, since we can simply factor it into multiple SLIDEs with
    smaller arguments.
    ---
     compiler/ghci/ByteCodeGen.lhs |  110 ++++++++++++++++++++++++----------------
     1 files changed, 66 insertions(+), 44 deletions(-)
    
    diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs
    index 046d6ec..c8b1b30 100644
    a b coreExprToBCOs dflags this_mod expr 
    131131
    132132type BCInstrList = OrdList BCInstr
    133133
    134 type Sequel = Word16 -- back off to this depth before ENTER
     134type Sequel = Word -- back off to this depth before ENTER
    135135
    136136-- Maps Ids to the offset from the stack _base_ so we don't have
    137137-- to mess with it after each push/pop.
    138 type BCEnv = Map Id Word16 -- To find vars on the stack
     138type BCEnv = Map Id Word -- To find vars on the stack
    139139
    140140{-
    141141ppBCEnv :: BCEnv -> SDoc
    schemeR_wrk fvs nm original_body (args, body) 
    298298                 arity bitmap_size bitmap False{-not alts-})
    299299
    300300-- introduce break instructions for ticked expressions
    301 schemeER_wrk :: Word16 -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
     301schemeER_wrk :: Word -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
    302302schemeER_wrk d p rhs
    303303  | AnnTick (Breakpoint tick_no fvs) (_annot, newRhs) <- rhs
    304   = do  code <- schemeE d 0 p newRhs
     304  = do  code <- schemeE (fromIntegral d) 0 p newRhs
    305305        arr <- getBreakArray
    306306        this_mod <- getCurrentModule
    307307        let idOffSets = getVarOffSets d p fvs
    schemeER_wrk d p rhs 
    315315                         BA arr# ->
    316316                             BRK_FUN arr# (fromIntegral tick_no) breakInfo
    317317        return $ breakInstr `consOL` code
    318    | otherwise = schemeE d 0 p rhs
     318   | otherwise = schemeE (fromIntegral d) 0 p rhs
    319319
    320 getVarOffSets :: Word16 -> BCEnv -> [Id] -> [(Id, Word16)]
     320getVarOffSets :: Word -> BCEnv -> [Id] -> [(Id, Word16)]
    321321getVarOffSets d p = catMaybes . map (getOffSet d p)
    322322
    323 getOffSet :: Word16 -> BCEnv -> Id -> Maybe (Id, Word16)
     323getOffSet :: Word -> BCEnv -> Id -> Maybe (Id, Word16)
    324324getOffSet d env id
    325325   = case lookupBCEnv_maybe id env of
    326326        Nothing     -> Nothing
    327         Just offset -> Just (id, d - offset)
     327        Just offset -> Just (id, trunc16 $ d - offset)
     328
     329trunc16 :: Word -> Word16
     330trunc16 w
     331    | w > fromIntegral (maxBound :: Word16)
     332    = panic "stack depth overflow"
     333    | otherwise
     334    = fromIntegral w
    328335
    329336fvsToEnv :: BCEnv -> VarSet -> [Id]
    330337-- Takes the free variables of a right-hand side, and
    fvsToEnv p fvs = [v | v <- varSetElems fvs, 
    342349-- -----------------------------------------------------------------------------
    343350-- schemeE
    344351
    345 returnUnboxedAtom :: Word16 -> Sequel -> BCEnv
     352returnUnboxedAtom :: Word -> Sequel -> BCEnv
    346353                 -> AnnExpr' Id VarSet -> CgRep
    347354                 -> BcM BCInstrList
    348355-- Returning an unlifted value.
    returnUnboxedAtom d s p e e_rep 
    355362
    356363-- Compile code to apply the given expression to the remaining args
    357364-- on the stack, returning a HNF.
    358 schemeE :: Word16 -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
     365schemeE :: Word -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
    359366
    360367schemeE d s p e
    361368   | Just e' <- bcView e
    schemeE d s p (AnnLet binds (_,body)) 
    404411         -- after the closures have been allocated in the heap (but not
    405412         -- filled in), and pointers to them parked on the stack.
    406413         p'    = Map.insertList (zipE xs (mkStackOffsets d (genericReplicate n_binds 1))) p
    407          d'    = d + n_binds
     414         d'    = d + fromIntegral n_binds
    408415         zipE  = zipEqual "schemeE"
    409416
    410417         -- ToDo: don't build thunks for things with no free variables
    schemeE d s p (AnnLet binds (_,body)) 
    415422                     | otherwise  = MKPAP
    416423         build_thunk dd (fv:fvs) size bco off arity = do
    417424              (push_code, pushed_szw) <- pushAtom dd p' (AnnVar fv)
    418               more_push_code <- build_thunk (dd+pushed_szw) fvs size bco off arity
     425              more_push_code <- build_thunk (dd + fromIntegral pushed_szw) fvs size bco off arity
    419426              return (push_code `appOL` more_push_code)
    420427
    421428         alloc_code = toOL (zipWith mkAlloc sizes arities)
    schemeE _ _ _ expr 
    542549-- 4.  Otherwise, it must be a function call.  Push the args
    543550--     right to left, SLIDE and ENTER.
    544551
    545 schemeT :: Word16       -- Stack depth
     552schemeT :: Word         -- Stack depth
    546553        -> Sequel       -- Sequel depth
    547554        -> BCEnv        -- stack env
    548555        -> AnnExpr' Id VarSet
    schemeT d s p app 
    561568   = do (push, arg_words) <- pushAtom d p arg
    562569        tagToId_sequence <- implement_tagToId constr_names
    563570        return (push `appOL`  tagToId_sequence
    564                        `appOL`  mkSLIDE 1 (d+arg_words-s)
     571                       `appOL`  mkSLIDE 1 (d - s + fromIntegral arg_words)
    565572                       `snocOL` ENTER)
    566573
    567574   -- Case 1
    schemeT d s p app 
    625632-- Generate code to build a constructor application,
    626633-- leaving it on top of the stack
    627634
    628 mkConAppCode :: Word16 -> Sequel -> BCEnv
     635mkConAppCode :: Word -> Sequel -> BCEnv
    629636             -> DataCon                 -- The data constructor
    630637             -> [AnnExpr' Id VarSet]    -- Args, in *reverse* order
    631638             -> BcM BCInstrList
    mkConAppCode orig_d _ p con args_r_to_l 
    646653
    647654      do_pushery d (arg:args)
    648655         = do (push, arg_words) <- pushAtom d p arg
    649               more_push_code <- do_pushery (d+arg_words) args
     656              more_push_code <- do_pushery (d + fromIntegral arg_words) args
    650657              return (push `appOL` more_push_code)
    651658      do_pushery d []
    652659         = return (unitOL (PACK con n_arg_words))
    653660         where
    654            n_arg_words = d - orig_d
     661           n_arg_words = trunc16 $ d - orig_d
    655662
    656663
    657664-- -----------------------------------------------------------------------------
    mkConAppCode orig_d _ p con args_r_to_l 
    662669-- returned, even if it is a pointed type.  We always just return.
    663670
    664671unboxedTupleReturn
    665         :: Word16 -> Sequel -> BCEnv
     672        :: Word -> Sequel -> BCEnv
    666673        -> AnnExpr' Id VarSet -> BcM BCInstrList
    667674unboxedTupleReturn d s p arg = do
    668675  (push, sz) <- pushAtom d p arg
    669676  return (push                      `appOL`
    670           mkSLIDE sz (d-s)          `snocOL`
     677          mkSLIDE sz (d - s)        `snocOL`
    671678          RETURN_UBX (atomRep arg))
    672679
    673680-- -----------------------------------------------------------------------------
    674681-- Generate code for a tail-call
    675682
    676683doTailCall
    677         :: Word16 -> Sequel -> BCEnv
     684        :: Word -> Sequel -> BCEnv
    678685        -> Id -> [AnnExpr' Id VarSet]
    679686        -> BcM BCInstrList
    680687doTailCall init_d s p fn args
    doTailCall init_d s p fn args 
    685692        (push_fn, sz) <- pushAtom d p (AnnVar fn)
    686693        ASSERT( sz == 1 ) return ()
    687694        return (push_fn `appOL` (
    688                   mkSLIDE ((d-init_d) + 1) (init_d - s) `appOL`
     695                  mkSLIDE (trunc16 $ d - init_d + 1) (init_d - s) `appOL`
    689696                  unitOL ENTER))
    690697  do_pushes d args reps = do
    691698      let (push_apply, n, rest_of_reps) = findPushSeq reps
    doTailCall init_d s p fn args 
    698705  push_seq d [] = return (d, nilOL)
    699706  push_seq d (arg:args) = do
    700707    (push_code, sz) <- pushAtom d p arg
    701     (final_d, more_push_code) <- push_seq (d+sz) args
     708    (final_d, more_push_code) <- push_seq (d + fromIntegral sz) args
    702709    return (final_d, push_code `appOL` more_push_code)
    703710
    704711-- v. similar to CgStackery.findMatch, ToDo: merge
    findPushSeq _ 
    731738-- -----------------------------------------------------------------------------
    732739-- Case expressions
    733740
    734 doCase  :: Word16 -> Sequel -> BCEnv
     741doCase  :: Word -> Sequel -> BCEnv
    735742        -> AnnExpr Id VarSet -> Id -> [AnnAlt Id VarSet]
    736743        -> Bool  -- True <=> is an unboxed tuple case, don't enter the result
    737744        -> BcM BCInstrList
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    741748        -- underneath it is the pointer to the alt_code BCO.
    742749        -- When an alt is entered, it assumes the returned value is
    743750        -- on top of the itbl.
     751        ret_frame_sizeW :: Word
    744752        ret_frame_sizeW = 2
    745753
    746754        -- An unlifted value gets an extra info table pushed on top
    747755        -- when it is returned.
     756        unlifted_itbl_sizeW :: Word
    748757        unlifted_itbl_sizeW | isAlgCase = 0
    749758                            | otherwise = 1
    750759
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    758767
    759768        -- Env in which to compile the alts, not including
    760769        -- any vars bound by the alts themselves
    761         p_alts = Map.insert bndr (d_bndr - 1) p
     770        p_alts = Map.insert bndr (fromIntegral d_bndr - 1) p
    762771
    763772        bndr_ty = idType bndr
    764773        isAlgCase = not (isUnLiftedType bndr_ty) && not is_unboxed_tuple
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    788797                        p_alts
    789798             in do
    790799             MASSERT(isAlgCase)
    791              rhs_code <- schemeE (d_alts+size) s p' rhs
    792              return (my_discr alt, unitOL (UNPACK size) `appOL` rhs_code)
     800             rhs_code <- schemeE (d_alts + size) s p' rhs
     801             return (my_discr alt, unitOL (UNPACK (trunc16 size)) `appOL` rhs_code)
    793802           where
    794803             real_bndrs = filterOut isTyVar bndrs
    795804
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    828837        -- really want a bitmap up to depth (d-s).  This affects compilation of
    829838        -- case-of-case expressions, which is the only time we can be compiling a
    830839        -- case expression with s /= 0.
    831         bitmap_size = d-s
     840        bitmap_size = trunc16 $ d-s
    832841        bitmap_size' :: Int
    833842        bitmap_size' = fromIntegral bitmap_size
    834843        bitmap = intsToReverseBitmap bitmap_size'{-size-}
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    839848          spread (id, offset)
    840849                | isFollowableArg (idCgRep id) = [ rel_offset ]
    841850                | otherwise = []
    842                 where rel_offset = d - offset - 1
     851                where rel_offset = trunc16 $ d - fromIntegral offset - 1
    843852
    844853     in do
    845854     alt_stuff <- mapM codeAlt alts
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    852861     -- in
    853862--     trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
    854863--            "\n      bitmap = " ++ show bitmap) $ do
    855      scrut_code <- schemeE (d + ret_frame_sizeW) (d + ret_frame_sizeW) p scrut
     864     scrut_code <- schemeE (d + ret_frame_sizeW)
     865                           (d + ret_frame_sizeW)
     866                           p scrut
    856867     alt_bco' <- emitBc alt_bco
    857868     let push_alts
    858869            | isAlgCase = PUSH_ALTS alt_bco'
    doCase d s p (_,scrut) bndr alts is_unboxed_tuple 
    869880-- (machine) code for the ccall, and create bytecodes to call that and
    870881-- then return in the right way.
    871882
    872 generateCCall :: Word16 -> Sequel       -- stack and sequel depths
     883generateCCall :: Word -> Sequel         -- stack and sequel depths
    873884              -> BCEnv
    874885              -> CCallSpec              -- where to call
    875886              -> Id                     -- of target, for type info
    generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l 
    896907                    -- contains.
    897908                    Just t
    898909                     | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
    899                        -> do rest <- pargs (d + addr_sizeW) az
     910                       -> do rest <- pargs (d + fromIntegral addr_sizeW) az
    900911                             code <- parg_ArrayishRep (fromIntegral arrPtrsHdrSize) d p a
    901912                             return ((code,AddrRep):rest)
    902913
    903914                     | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
    904                        -> do rest <- pargs (d + addr_sizeW) az
     915                       -> do rest <- pargs (d + fromIntegral addr_sizeW) az
    905916                             code <- parg_ArrayishRep (fromIntegral arrWordsHdrSize) d p a
    906917                             return ((code,AddrRep):rest)
    907918
    908919                    -- Default case: push taggedly, but otherwise intact.
    909920                    _
    910921                       -> do (code_a, sz_a) <- pushAtom d p a
    911                              rest <- pargs (d+sz_a) az
     922                             rest <- pargs (d + fromIntegral sz_a) az
    912923                             return ((code_a, atomPrimRep a) : rest)
    913924
    914925         -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
    915926         -- the stack but then advance it over the headers, so as to
    916927         -- point to the payload.
    917          parg_ArrayishRep :: Word16 -> Word16 -> BCEnv -> AnnExpr' Id VarSet
     928         parg_ArrayishRep :: Word16 -> Word -> BCEnv -> AnnExpr' Id VarSet
    918929                          -> BcM BCInstrList
    919930         parg_ArrayishRep hdrSize d p a
    920931            = do (push_fo, _) <- pushAtom d p a
    generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l 
    10161027         (push_Addr, d_after_Addr)
    10171028            | is_static
    10181029            = (toOL [PUSH_UBX (Right static_target_addr) addr_sizeW],
    1019                d_after_args + addr_sizeW)
     1030               d_after_args + fromIntegral addr_sizeW)
    10201031            | otherwise -- is already on the stack
    10211032            = (nilOL, d_after_args)
    10221033
    10231034         -- Push the return placeholder.  For a call returning nothing,
    10241035         -- this is a VoidArg (tag).
    10251036         r_sizeW   = fromIntegral (primRepSizeW r_rep)
    1026          d_after_r = d_after_Addr + r_sizeW
     1037         d_after_r = d_after_Addr + fromIntegral r_sizeW
    10271038         r_lit     = mkDummyLiteral r_rep
    10281039         push_r    = (if   returns_void
    10291040                      then nilOL
    generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l 
    10351046         -- instruction needs to describe the chunk of stack containing
    10361047         -- the ccall args to the GC, so it needs to know how large it
    10371048         -- is.  See comment in Interpreter.c with the CCALL instruction.
    1038          stk_offset   = d_after_r - s
     1049         stk_offset   = trunc16 $ d_after_r - s
    10391050
    10401051     -- in
    10411052     -- the only difference in libffi mode is that we prepare a cif
    generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l 
    10501061         do_call      = unitOL (CCALL stk_offset (castFunPtrToPtr addr_of_marshaller)
    10511062                                 (fromIntegral (fromEnum (playInterruptible safety))))
    10521063         -- slide and return
    1053          wrapup       = mkSLIDE r_sizeW (d_after_r - r_sizeW - s)
     1064         wrapup       = mkSLIDE r_sizeW (d_after_r - fromIntegral r_sizeW - s)
    10541065                        `snocOL` RETURN_UBX (primRepToCgRep r_rep)
    10551066     --in
    10561067         --trace (show (arg1_offW, args_offW  ,  (map cgRepSizeW a_reps) )) $
    implement_tagToId names 
    11501161-- to 5 and not to 4.  Stack locations are numbered from zero, so a
    11511162-- depth 6 stack has valid words 0 .. 5.
    11521163
    1153 pushAtom :: Word16 -> BCEnv -> AnnExpr' Id VarSet -> BcM (BCInstrList, Word16)
     1164pushAtom :: Word -> BCEnv -> AnnExpr' Id VarSet -> BcM (BCInstrList, Word16)
    11541165
    11551166pushAtom d p e
    11561167   | Just e' <- bcView e
    pushAtom d p (AnnVar v) 
    11701181   = return (unitOL (PUSH_PRIMOP primop), 1)
    11711182
    11721183   | Just d_v <- lookupBCEnv_maybe v p  -- v is a local variable
    1173    = let l = d - d_v + sz - 2
     1184   = let l = trunc16 $ d - d_v + fromIntegral sz - 2
    11741185     in return (toOL (genericReplicate sz (PUSH_L l)), sz)
    11751186         -- d - d_v                 the number of words between the TOS
    11761187         --                         and the 1st slot of the object
    instance Outputable Discr where 
    14011412   ppr NoDiscr    = text "DEF"
    14021413
    14031414
    1404 lookupBCEnv_maybe :: Id -> BCEnv -> Maybe Word16
     1415lookupBCEnv_maybe :: Id -> BCEnv -> Maybe Word
    14051416lookupBCEnv_maybe = Map.lookup
    14061417
    14071418idSizeW :: Id -> Int
    unboxedTupleException 
    14171428            "  Workaround: use -fobject-code, or compile this module to .o separately."))
    14181429
    14191430
    1420 mkSLIDE :: Word16 -> Word16 -> OrdList BCInstr
    1421 mkSLIDE n d = if d == 0 then nilOL else unitOL (SLIDE n d)
     1431mkSLIDE :: Word16 -> Word -> OrdList BCInstr
     1432mkSLIDE n d
     1433    -- if the amount to slide doesn't fit in a word,
     1434    -- generate multiple slide instructions
     1435    | d > fromIntegral limit
     1436    = SLIDE n limit `consOL` mkSLIDE n (d - fromIntegral limit)
     1437    | d == 0
     1438    = nilOL
     1439    | otherwise
     1440    = if d == 0 then nilOL else unitOL (SLIDE n $ fromIntegral d)
     1441    where
     1442        limit :: Word16
     1443        limit = maxBound
    14221444
    14231445splitApp :: AnnExpr' Var ann -> (AnnExpr' Var ann, [AnnExpr' Var ann])
    14241446        -- The arguments are returned in *right-to-left* order
    isPtrAtom e = atomRep e == PtrArg 
    14651487-- Let szsw be the sizes in words of some items pushed onto the stack,
    14661488-- which has initial depth d'.  Return the values which the stack environment
    14671489-- should map these items to.
    1468 mkStackOffsets :: Word16 -> [Word16] -> [Word16]
     1490mkStackOffsets :: Word -> [Word] -> [Word]
    14691491mkStackOffsets original_depth szsw
    14701492   = map (subtract 1) (tail (scanl (+) original_depth szsw))
    14711493