311 | | Below is a proof-of-concept filter function that demonstrates performance gains possible with the new primops: |

312 | | |

313 | | {{{ |

314 | | filterN :: Vector Int -> Vector Int |

| 311 | Below is a benchmark for the proof-of-concept filter function that demonstrates performance gains possible with the new primops: |

| 312 | |

| 313 | {{{ |

| 314 | {-# LANGUAGE BangPatterns, MagicHash #-} |

| 315 | module Main ( |

| 316 | main |

| 317 | ) where |

| 318 | |

| 319 | import Control.Monad.ST (runST) |

| 320 | import Criterion.Config (Config, cfgPerformGC, |

| 321 | defaultConfig, ljust) |

| 322 | import Criterion.Main |

| 323 | import Data.Vector.Unboxed.Mutable (unsafeNew, unsafeSlice, unsafeWrite) |

| 324 | import Data.Vector.Unboxed as U (Vector, filter, foldM', |

| 325 | fromList, length, unsafeFreeze) |

| 326 | import GHC.Exts (Int (I#), (.>=#)) |

| 327 | import System.Random (RandomGen, mkStdGen, randoms) |

| 328 | import Prelude hiding (filter, length) |

| 329 | |

| 330 | |

| 331 | filterN :: U.Vector Int -> U.Vector Int |

325 | | }}} |

326 | | |

327 | | Benchmarking with criterion shows that this function is 60% faster than the filter function based on stream fusion (tested for unboxed vectors containing 10 thousand and 10 million elements). |

| 342 | |

| 343 | |

| 344 | main :: IO () |

| 345 | main = return (mkStdGen 1232134332) >>= |

| 346 | defaultMainWith benchConfig (return ()) . benchmarks |

| 347 | |

| 348 | |

| 349 | benchmarks :: RandomGen g => g -> [Benchmark] |

| 350 | benchmarks gen = |

| 351 | let dataSize = 10 ^ (7 :: Int) |

| 352 | inputList = take dataSize . randoms $ gen :: [Int] |

| 353 | inputVec = fromList inputList |

| 354 | isPositive = (> 0) |

| 355 | in [ |

| 356 | bgroup "Filter" |

| 357 | [ |

| 358 | bench "New" $ whnf (filterN) inputVec |

| 359 | , bench "Vector" $ whnf (filter isPositive) inputVec |

| 360 | ] |

| 361 | ] |

| 362 | |

| 363 | |

| 364 | benchConfig :: Config |

| 365 | benchConfig = defaultConfig { |

| 366 | cfgPerformGC = ljust True |

| 367 | } |

| 368 | |

| 369 | }}} |

| 370 | |

| 371 | Compile and run with: |

| 372 | |

| 373 | {{{ |

| 374 | ghc -O2 -fllvm -optlo-O3 Main.hs |

| 375 | ./Main -o report.html |

| 376 | }}} |

| 377 | |

| 378 | Benchmarking shows that `filterN` function is 60% faster than the `filter` function based on stream fusion (tested for unboxed vectors containing 10 thousand and 10 million elements). |