Ticket #2077: Trie2.hs

File Trie2.hs, 13.2 KB (added by tbh, 7 years ago)

Example of working file with different comment block

Line 
1-- ----------------------------------------------------------------------------
2
3{- |
4  Module     : Holumbus.Data.Trie
5  Copyright  : Copyright (C) 2007 Timo B. Huebel
6  License    : MIT
7
8  Maintainer : Timo B. Huebel ([email protected])
9  Stability  : experimental
10  Portability: portable
11  Version    : 0.5
12
13  An efficient implementation of maps from arbitrary byte key to arbitrary values.
14
15  Values can associated with an arbitrary byte key. Searching for keys is very fast, but
16  the trie probably consumes more memory than "Data.Map". The main differences are the special
17  'prefixFind' functions, which can be used to perform prefix queries.
18
19  Most other function names clash with "Prelude" names, therefore this module is usually
20  imported @qualified@, e.g.
21 
22  > import Holumbus.Data.Trie (Trie)
23  > import qualified Holumbus.Data.Trie as T
24
25  See also
26   
27-}
28
29-- ----------------------------------------------------------------------------
30
31{-# OPTIONS -fglasgow-exts #-}
32
33module Holumbus.Data.Trie where
34
35import Prelude hiding (succ, lookup, map, null)
36
37import Data.Maybe
38import Data.Char
39import Data.Binary
40import Data.Word
41
42import Control.Monad
43
44import Data.Foldable (Foldable)
45import qualified Data.Foldable as F
46
47import qualified Data.List as L
48import qualified Data.Map as M
49
50import Control.Parallel.Strategies
51
52-- | A map from arbitrary byte keys to values a.
53data Trie a
54  = End !Key !a ![Trie a]
55  | Seq !Key ![Trie a] 
56
57-- | The key type.
58type Key = [Word8]
59
60-- Just deriving Eq will not work, because equality on the lists of successors takes the order
61-- into account, whereas the order does not matter here.
62instance Eq a => Eq (Trie a) where 
63  (==) (End k1 v1 s1) (End k2 v2 s2) = k1 == k2 && v1 == v2 && s1 L.\\ s2 == []
64  (==) (Seq k1 s1) (Seq k2 s2)       = k1 == k2 && s1 L.\\ s2 == []
65  (==) (Seq _ _) (End _ _ _)         = False
66  (==) (End _ _ _) (Seq _ _)         = False
67  (/=) m1 m2                         = not (m1 == m2)
68
69-- Compare based on to-/fromList
70instance Ord a => Ord (Trie a) where
71  compare m1 m2 = compare (toList m1) (toList m2)
72
73-- Simple instance of Functor.
74instance Functor Trie where
75  fmap = map
76
77-- Simple instance of Data.Foldable
78instance Foldable Trie where
79  foldr = fold
80
81-- Stolen from Data.IntMap
82instance Show a => Show (Trie a) where
83  showsPrec d m   = showParen (d > 10) $
84    showString "fromList " . shows (toList m)
85
86-- Stolen from Data.IntMap
87instance Read a => Read (Trie a) where
88  readsPrec p = readParen (p > 10) $ \ r -> do
89    ("fromList",s) <- lex r
90    (xs,t) <- reads s
91    return (fromList xs,t)
92
93-- Providing strict evaluation for 'StrMap'.
94instance NFData a => NFData (Trie a) where
95  rnf (End k v t) = rnf k `seq` rnf v `seq` rnf t
96  rnf (Seq k t)   = rnf k `seq` rnf t
97
98-- Provide native binary serialization (not via to-/fromList).
99instance (Binary a) => Binary (Trie a) where
100  put (End k v t) = put (0 :: Word8) >> put k >> put v >> put t
101  put (Seq k t)   = put (1 :: Word8) >> put k >> put t
102
103  get = do tag <- getWord8
104           case tag of
105             0 -> liftM3 End get get get
106             1 -> liftM2 Seq get get
107             _ -> fail "Trie.get: error while decoding StrMap"                       
108
109-- | /O(1)/ Create an empty trie.
110empty :: Trie a
111empty = Seq [] []
112
113-- | /O(1)/ Is the map empty?
114null :: Trie a -> Bool
115null (Seq _ [])    = True
116null (Seq _ (_:_)) = False
117null (End _ _ _)   = error "Trie.null: root node should be Seq"
118
119-- | /O(1)/ Create a map with a single element.
120singleton :: Key -> a -> Trie a
121singleton k v = Seq [] [End k v []]
122
123-- | /O(1)/ Extract the key of a node
124key :: Trie a -> Key
125key (End k _ _) = k
126key (Seq k _)   = k
127
128-- | /O(1)/ Extract the value of a node (if there is one)
129value :: Monad m => Trie a -> m a
130value (End _ v _) = return v
131value (Seq _ _) = fail "Trie.value: no value at this node"
132
133-- | /O(1)/ Extract the value of a node or return a default value if no value exists.
134valueWithDefault :: a -> Trie a -> a
135valueWithDefault _ (End _ v _) = v
136valueWithDefault d (Seq _ _) = d
137
138-- | /O(1)/ Extract the successors of a node
139succ :: Trie a -> [Trie a]
140succ (End _ _ t) = t
141succ (Seq _ t)   = t
142
143-- | /O(1)/ Sets the key of a node.
144setKey :: Key -> Trie a -> Trie a
145setKey k (End _ v t) = End k v t
146setKey k (Seq _ t)   = Seq k t
147
148-- | /O(1)/ Sets the successors of a node.
149setSucc :: [Trie a] -> Trie a -> Trie a
150setSucc t (End k v _) = End k v t
151setSucc t (Seq k _)   = Seq k t
152
153-- | /O(min(n,L))/ Find the value at a key. Calls error when the element can not be found.
154(!) :: Trie a -> Key -> a
155(!) m k = if isNothing r then error ("Trie.!: element not in the map")
156          else fromJust r
157          where r = lookup k m
158
159-- | /O(min(n,L))/ Is the key a member of the map?
160member :: Key -> Trie a -> Bool
161member k m = maybe False (\_ -> True) (lookup k m)
162
163-- | /O(min(n,L))/ Delete an element from the map. If no element exists for the key, the map
164-- remains unchanged.
165delete :: Key -> Trie a -> Trie a
166delete = (fromMaybe empty .) . delete'
167
168-- | The internal delete function.
169delete' :: Key -> Trie a -> Maybe (Trie a)
170delete' d n | L.null dr && L.null kr       = deleteNode n
171            | not (L.null dr) && L.null kr = Just (mergeNode n (mapMaybe (delete' dr) (succ n)))
172            | otherwise                    = Just n
173            where (_, dr, kr) = split d (key n)
174
175-- | Merge a node with its successor if only one successor is left.
176mergeNode :: Trie a -> [Trie a] -> Trie a
177mergeNode (End k v _) t = End k v t
178mergeNode (Seq k _) [t] = if not (L.null k) then setKey (k ++ (key t)) t else Seq k [t]
179mergeNode (Seq k _) t   = Seq k t
180
181-- | Delete a node by either merging it with its successors or removing it completely.
182deleteNode :: Trie a -> Maybe (Trie a)
183deleteNode (End _ _ [])  = Nothing
184deleteNode (End k _ [t]) = Just (setKey (k ++ key t) t)
185deleteNode (End k _ t)   = Just (Seq k t)
186deleteNode n             = Just n
187
188-- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map,
189-- the value of @f key new_value old_value@ will be inserted.
190insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> Trie a -> Trie a
191insertWithKey f nk nv n = insert' f nk nv nk n
192
193-- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map,
194-- the value of @f new_value old_value@ will be inserted.
195insertWith :: (a -> a -> a) -> Key -> a -> Trie a -> Trie a
196insertWith f nk nv n = insert' (\_ new old -> f new old) nk nv nk n
197
198-- | /O(min(n,L))/ Insert a new key and value into the map. If the key is already present in
199-- the map, the associated value will be replaced with the new value.
200insert :: Key -> a -> Trie a -> Trie a
201insert nk nv n = insertWith const nk nv n
202
203-- | The internal insert function which does the real work. The original new key has to
204-- be put through because otherwise it will be shortened on every recursive call.
205insert' :: (Key -> a -> a -> a) -> Key -> a -> Key -> Trie a -> Trie a
206insert' f nk nv ok n | L.null nk                    = error "Empty key!"
207                     -- Key already exists, the current value will be replaced with the new value.
208                     | L.null cr && L.null nr       = End s (maybe nv (f ok nv) (value n)) (succ n) 
209                     -- Insert into list of successors.
210                     | L.null cr && not (L.null nr) = setSucc (insertSub f nr nv ok (succ n)) n
211                     -- New intermediate End node with the new value and the current node with the
212                     -- remainder of the key as successor.
213                     | L.null nr && not (L.null cr) = End s nv [setKey cr n]
214                     -- New intermediate Seq node which shares the prefix of the new key and the
215                     -- key of the current node.
216                     | otherwise = Seq s [setKey cr n, (End nr nv [])]
217                     where (s, nr, cr) = split nk (key n)
218
219-- | Internal support function for insert which searches the correct successor to insert into
220-- within a list of nodes (the successors of the current node, see call in insert' above).
221insertSub :: (Key -> a -> a -> a) -> Key -> a -> Key -> [Trie a] -> [Trie a]
222insertSub f k v o t = insertSub' f k v t []
223  where
224    insertSub' :: (Key -> a -> a -> a) -> Key -> a -> [Trie a] -> [Trie a] -> [Trie a]
225    insertSub' _ nk nv [] r     = (End nk nv []):r
226    insertSub' cf nk nv (x:xs) r = if head (key x) == head nk then (insert' cf nk nv o x):r ++ xs else 
227                                  insertSub' cf nk nv xs (x:r)
228
229-- | Analyses two strings and splits them into three parts: A common prefix and both reminders
230splitBy :: (Key -> Key) -> Key -> Key -> (Key, Key, Key)
231splitBy f a b = splitBy' (f a) (f b) ([], [], [])
232  where
233    splitBy' :: Key -> Key -> (Key, Key, Key) -> (Key, Key, Key)
234    splitBy' n [] (p, nr, hr) = (p, nr ++ n, hr)
235    splitBy' [] h (p, nr, hr) = (p, nr, hr ++ h)
236    splitBy' (n:ns) (h:hs) (p, nr, hr) = if n == h then splitBy' ns hs (p ++ [n], nr, hr) 
237                                         else (p, n:ns, h:hs)
238
239-- | Simple split without any preprocessing.
240split :: Key -> Key -> (Key, Key, Key)
241split = splitBy id
242
243-- | /O(n)/ Returns all values.
244elems :: Trie a -> [a]
245elems t   = L.map snd (toList t)
246
247-- | /O(n)/ Creates a trie from a list of key\/value pairs.
248fromList :: [(Key, a)] -> Trie a
249fromList xs = L.foldl' (\p (k, v) -> insert k v p) empty xs
250
251-- | /O(n)/ Returns all elements as list of key value pairs,
252toList :: Trie a -> [(Key, a)]
253toList = foldWithKey (\k v r -> (k, v):r) []
254
255-- | /O(n)/ The number of elements.
256size :: Trie a -> Int
257size = fold (\_ r -> r + 1) 0
258
259-- | /O(max(L,R))/ Find all values where the string is a prefix of the key.
260prefixFind :: Key -> Trie a -> [a] 
261prefixFind q n = L.map snd (prefixFindInternal split q n)
262
263-- | /O(max(L,R))/ Find all values where the string is a prefix of the key and include the keys
264-- in the result.
265prefixFindWithKey :: Key -> Trie a -> [(Key, a)]
266prefixFindWithKey = prefixFindInternal split
267
268-- | /O(max(L,R))/ Same as 'prefixFind', but preprocesses the search key and every
269-- key in the map with @f@ before comparison.
270prefixFindBy :: (Key -> Key) -> Key -> Trie a -> [a]
271prefixFindBy f q n = L.map snd (prefixFindInternal (splitBy f) q n)
272
273-- | /O(max(L,R))/ Same as 'prefixFindWithKey', but preprocesses the search key and every
274-- key in the map with @f@ before comparison.
275prefixFindWithKeyBy :: (Key -> Key) -> Key -> Trie a -> [(Key, a)]
276prefixFindWithKeyBy f = prefixFindInternal (splitBy f)
277
278-- | Internal prefix find function which is used to implement every other prefix find function.
279prefixFindInternal :: (Key -> Key -> (Key, Key, Key)) -> Key -> Trie a -> [(Key, a)]
280prefixFindInternal f = prefixFindInternal' f []
281  where
282    prefixFindInternal' sf a p n | L.null pr = L.map (\(k, v) -> (a ++ k, v)) (toList n)
283                                 | L.null kr = concat (L.map (prefixFindInternal' sf (a ++ (key n)) pr) (succ n))
284                                 | otherwise = []
285                                 where (_, pr, kr) = sf p (key n)
286
287-- | /O(min(n,L))/ Find the value associated with a key. The function will @return@ the result in
288-- the monad or @fail@ in it if the key isn't in the map.
289lookup :: Monad m => Key -> Trie a -> m a
290lookup q n = case lookup' q n of
291             Just v -> return v
292             Nothing -> fail "Trie.lookup: Key not found"
293
294-- | Internal lookup function which is generalised for arbitrary monads above.
295lookup' :: Key -> Trie a -> Maybe a
296lookup' q n | L.null pr = if L.null kr then value n else Nothing
297            | L.null kr = let xs = (filter isJust (L.map (lookup pr) (succ n))) in
298                          if L.null xs then Nothing else head xs
299            | otherwise = Nothing
300            where (_, pr, kr) = split q (key n)
301
302-- | /O(max(L,R))/ Same as 'lookup', but preprocesses the search key and every
303-- key in the map with @f@ before comparison.
304lookupBy :: (Key -> Key) -> Key -> Trie a -> [a]
305lookupBy f q n | L.null pr = if L.null kr then maybeToList (value n) else []
306               | L.null kr = concat (L.map (lookupBy f pr) (succ n))
307               | otherwise = []
308               where (_, pr, kr) = splitBy f q (key n)
309
310-- | /O(min(n,L))/ Find the value associated with a key or return a default value if nothing
311-- was found.
312findWithDefault :: a -> Key -> Trie a -> a
313findWithDefault d q n = maybe d id (lookup q n)
314
315-- | /O(n)/ Fold over all key\/value pairs in the map.
316foldWithKey :: (Key -> a -> b -> b) -> b -> Trie a -> b
317foldWithKey f n m = fold' [] m n
318  where
319  fold' ck (End k v t) r = let nk = ck ++ k in foldr (fold' nk) (f nk v r) t
320  fold' ck (Seq k t) r   = let nk = ck ++ k in foldr (fold' nk) r t
321
322-- | /O(n)/ Fold over all values in the map.
323fold :: (a -> b -> b) -> b -> Trie a -> b
324fold f = foldWithKey (\_ v r -> f v r)
325
326-- | /O(n)/ Map over all key\/value pairs in the map.
327mapWithKey :: (Key -> a -> b) -> Trie a -> Trie b
328mapWithKey f m = map' [] m
329  where
330  map' ck (End k v t) = let nk = ck ++ k in End k (f nk v) (L.map (map' nk) t)
331  map' ck (Seq k t)   = let nk = ck ++ k in Seq k (L.map (map' nk) t)
332
333-- | /O(n)/ Map over all values in the map.
334map :: (a -> b) -> Trie a -> Trie b
335map f = mapWithKey (\_ v -> f v)
336
337-- | /O(n)/ Convert into an ordinary map.
338toMap :: Trie a -> M.Map Key a
339toMap = foldWithKey M.insert M.empty
340
341-- | /O(n)/ Convert an ordinary map into a StrMap.
342fromMap :: M.Map Key a -> Trie a
343fromMap = M.foldWithKey insert empty
344
345-- | /O(n)/ Calculate some statistics about the Trie for debugging purposes.