Ticket #2077: Trie1.hs

File Trie1.hs, 14.0 KB (added by tbh, 8 years ago)

Example of GHC ignoring a pragma

Line 
1-- ----------------------------------------------------------------------------
2
3{- |
4  Module     : Holumbus.Data.Trie
5  Copyright  : Copyright (C) 2007 Timo B. Huebel
6  License    : MIT
7
8  Maintainer : Timo B. Huebel ([email protected])
9  Stability  : experimental
10  Portability: portable
11  Version    : 0.5
12
13  An efficient implementation of maps from arbitrary byte key to arbitrary values.
14
15  Values can associated with an arbitrary byte key. Searching for keys is very fast, but
16  the trie probably consumes more memory than "Data.Map". The main differences are the special
17  'prefixFind' functions, which can be used to perform prefix queries.
18
19  Most other function names clash with "Prelude" names, therefore this module is usually
20  imported @qualified@, e.g.
21 
22  > import Holumbus.Data.Trie (Trie)
23  > import qualified Holumbus.Data.Trie as T
24
25  See also
26 
27    * Donald R. Morrison,
28      \"/PATRICIA - Practical Algorithm To Retrieve Information Coded In Alphanumeric/\",
29      Journal of the ACM, 15 (4), 1968, pages 514-534.
30 
31  Many functions have a worst-case complexity of /O(min(n,L))/. This means that the operation
32  can become linear with the number of elements with a maximum of /L/, the length of the
33  key (the number of bytes in the list). The functions for searching a prefix have a worst-case
34  complexity of /O(max(L,R))/. This means that the operation can become linear with
35  /R/, the number of elements found for the prefix, with a minimum of /L/.
36 
37  The module is completely exported for ultimate flexibility. Derived modules should only
38  export a restricted interface (as shown in "Holumbus.Data.StrMap").
39 
40-}
41
42-- ----------------------------------------------------------------------------
43
44{-# OPTIONS -fglasgow-exts #-}
45
46module Holumbus.Data.Trie where
47
48import Prelude hiding (succ, lookup, map, null)
49
50import Data.Maybe
51import Data.Char
52import Data.Binary
53import Data.Word
54
55import Control.Monad
56
57import Data.Foldable (Foldable)
58import qualified Data.Foldable as F
59
60import qualified Data.List as L
61import qualified Data.Map as M
62
63import Control.Parallel.Strategies
64
65-- | A map from arbitrary byte keys to values a.
66data Trie a
67  = End !Key !a ![Trie a]
68  | Seq !Key ![Trie a] 
69
70-- | The key type.
71type Key = [Word8]
72
73-- Just deriving Eq will not work, because equality on the lists of successors takes the order
74-- into account, whereas the order does not matter here.
75instance Eq a => Eq (Trie a) where 
76  (==) (End k1 v1 s1) (End k2 v2 s2) = k1 == k2 && v1 == v2 && s1 L.\\ s2 == []
77  (==) (Seq k1 s1) (Seq k2 s2)       = k1 == k2 && s1 L.\\ s2 == []
78  (==) (Seq _ _) (End _ _ _)         = False
79  (==) (End _ _ _) (Seq _ _)         = False
80  (/=) m1 m2                         = not (m1 == m2)
81
82-- Compare based on to-/fromList
83instance Ord a => Ord (Trie a) where
84  compare m1 m2 = compare (toList m1) (toList m2)
85
86-- Simple instance of Functor.
87instance Functor Trie where
88  fmap = map
89
90-- Simple instance of Data.Foldable
91instance Foldable Trie where
92  foldr = fold
93
94-- Stolen from Data.IntMap
95instance Show a => Show (Trie a) where
96  showsPrec d m   = showParen (d > 10) $
97    showString "fromList " . shows (toList m)
98
99-- Stolen from Data.IntMap
100instance Read a => Read (Trie a) where
101  readsPrec p = readParen (p > 10) $ \ r -> do
102    ("fromList",s) <- lex r
103    (xs,t) <- reads s
104    return (fromList xs,t)
105
106-- Providing strict evaluation for 'StrMap'.
107instance NFData a => NFData (Trie a) where
108  rnf (End k v t) = rnf k `seq` rnf v `seq` rnf t
109  rnf (Seq k t)   = rnf k `seq` rnf t
110
111-- Provide native binary serialization (not via to-/fromList).
112instance (Binary a) => Binary (Trie a) where
113  put (End k v t) = put (0 :: Word8) >> put k >> put v >> put t
114  put (Seq k t)   = put (1 :: Word8) >> put k >> put t
115
116  get = do tag <- getWord8
117           case tag of
118             0 -> liftM3 End get get get
119             1 -> liftM2 Seq get get
120             _ -> fail "Trie.get: error while decoding StrMap"                       
121
122-- | /O(1)/ Create an empty trie.
123empty :: Trie a
124empty = Seq [] []
125
126-- | /O(1)/ Is the map empty?
127null :: Trie a -> Bool
128null (Seq _ [])    = True
129null (Seq _ (_:_)) = False
130null (End _ _ _)   = error "Trie.null: root node should be Seq"
131
132-- | /O(1)/ Create a map with a single element.
133singleton :: Key -> a -> Trie a
134singleton k v = Seq [] [End k v []]
135
136-- | /O(1)/ Extract the key of a node
137key :: Trie a -> Key
138key (End k _ _) = k
139key (Seq k _)   = k
140
141-- | /O(1)/ Extract the value of a node (if there is one)
142value :: Monad m => Trie a -> m a
143value (End _ v _) = return v
144value (Seq _ _) = fail "Trie.value: no value at this node"
145
146-- | /O(1)/ Extract the value of a node or return a default value if no value exists.
147valueWithDefault :: a -> Trie a -> a
148valueWithDefault _ (End _ v _) = v
149valueWithDefault d (Seq _ _) = d
150
151-- | /O(1)/ Extract the successors of a node
152succ :: Trie a -> [Trie a]
153succ (End _ _ t) = t
154succ (Seq _ t)   = t
155
156-- | /O(1)/ Sets the key of a node.
157setKey :: Key -> Trie a -> Trie a
158setKey k (End _ v t) = End k v t
159setKey k (Seq _ t)   = Seq k t
160
161-- | /O(1)/ Sets the successors of a node.
162setSucc :: [Trie a] -> Trie a -> Trie a
163setSucc t (End k v _) = End k v t
164setSucc t (Seq k _)   = Seq k t
165
166-- | /O(min(n,L))/ Find the value at a key. Calls error when the element can not be found.
167(!) :: Trie a -> Key -> a
168(!) m k = if isNothing r then error ("Trie.!: element not in the map")
169          else fromJust r
170          where r = lookup k m
171
172-- | /O(min(n,L))/ Is the key a member of the map?
173member :: Key -> Trie a -> Bool
174member k m = maybe False (\_ -> True) (lookup k m)
175
176-- | /O(min(n,L))/ Delete an element from the map. If no element exists for the key, the map
177-- remains unchanged.
178delete :: Key -> Trie a -> Trie a
179delete = (fromMaybe empty .) . delete'
180
181-- | The internal delete function.
182delete' :: Key -> Trie a -> Maybe (Trie a)
183delete' d n | L.null dr && L.null kr       = deleteNode n
184            | not (L.null dr) && L.null kr = Just (mergeNode n (mapMaybe (delete' dr) (succ n)))
185            | otherwise                    = Just n
186            where (_, dr, kr) = split d (key n)
187
188-- | Merge a node with its successor if only one successor is left.
189mergeNode :: Trie a -> [Trie a] -> Trie a
190mergeNode (End k v _) t = End k v t
191mergeNode (Seq k _) [t] = if not (L.null k) then setKey (k ++ (key t)) t else Seq k [t]
192mergeNode (Seq k _) t   = Seq k t
193
194-- | Delete a node by either merging it with its successors or removing it completely.
195deleteNode :: Trie a -> Maybe (Trie a)
196deleteNode (End _ _ [])  = Nothing
197deleteNode (End k _ [t]) = Just (setKey (k ++ key t) t)
198deleteNode (End k _ t)   = Just (Seq k t)
199deleteNode n             = Just n
200
201-- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map,
202-- the value of @f key new_value old_value@ will be inserted.
203insertWithKey :: (Key -> a -> a -> a) -> Key -> a -> Trie a -> Trie a
204insertWithKey f nk nv n = insert' f nk nv nk n
205
206-- | /O(min(n,L))/ Insert with a combining function. If the key is already present in the map,
207-- the value of @f new_value old_value@ will be inserted.
208insertWith :: (a -> a -> a) -> Key -> a -> Trie a -> Trie a
209insertWith f nk nv n = insert' (\_ new old -> f new old) nk nv nk n
210
211-- | /O(min(n,L))/ Insert a new key and value into the map. If the key is already present in
212-- the map, the associated value will be replaced with the new value.
213insert :: Key -> a -> Trie a -> Trie a
214insert nk nv n = insertWith const nk nv n
215
216-- | The internal insert function which does the real work. The original new key has to
217-- be put through because otherwise it will be shortened on every recursive call.
218insert' :: (Key -> a -> a -> a) -> Key -> a -> Key -> Trie a -> Trie a
219insert' f nk nv ok n | L.null nk                    = error "Empty key!"
220                     -- Key already exists, the current value will be replaced with the new value.
221                     | L.null cr && L.null nr       = End s (maybe nv (f ok nv) (value n)) (succ n) 
222                     -- Insert into list of successors.
223                     | L.null cr && not (L.null nr) = setSucc (insertSub f nr nv ok (succ n)) n
224                     -- New intermediate End node with the new value and the current node with the
225                     -- remainder of the key as successor.
226                     | L.null nr && not (L.null cr) = End s nv [setKey cr n]
227                     -- New intermediate Seq node which shares the prefix of the new key and the
228                     -- key of the current node.
229                     | otherwise = Seq s [setKey cr n, (End nr nv [])]
230                     where (s, nr, cr) = split nk (key n)
231
232-- | Internal support function for insert which searches the correct successor to insert into
233-- within a list of nodes (the successors of the current node, see call in insert' above).
234insertSub :: (Key -> a -> a -> a) -> Key -> a -> Key -> [Trie a] -> [Trie a]
235insertSub f k v o t = insertSub' f k v t []
236  where
237    insertSub' :: (Key -> a -> a -> a) -> Key -> a -> [Trie a] -> [Trie a] -> [Trie a]
238    insertSub' _ nk nv [] r     = (End nk nv []):r
239    insertSub' cf nk nv (x:xs) r = if head (key x) == head nk then (insert' cf nk nv o x):r ++ xs else 
240                                  insertSub' cf nk nv xs (x:r)
241
242-- | Analyses two strings and splits them into three parts: A common prefix and both reminders
243splitBy :: (Key -> Key) -> Key -> Key -> (Key, Key, Key)
244splitBy f a b = splitBy' (f a) (f b) ([], [], [])
245  where
246    splitBy' :: Key -> Key -> (Key, Key, Key) -> (Key, Key, Key)
247    splitBy' n [] (p, nr, hr) = (p, nr ++ n, hr)
248    splitBy' [] h (p, nr, hr) = (p, nr, hr ++ h)
249    splitBy' (n:ns) (h:hs) (p, nr, hr) = if n == h then splitBy' ns hs (p ++ [n], nr, hr) 
250                                         else (p, n:ns, h:hs)
251
252-- | Simple split without any preprocessing.
253split :: Key -> Key -> (Key, Key, Key)
254split = splitBy id
255
256-- | /O(n)/ Returns all values.
257elems :: Trie a -> [a]
258elems t   = L.map snd (toList t)
259
260-- | /O(n)/ Creates a trie from a list of key\/value pairs.
261fromList :: [(Key, a)] -> Trie a
262fromList xs = L.foldl' (\p (k, v) -> insert k v p) empty xs
263
264-- | /O(n)/ Returns all elements as list of key value pairs,
265toList :: Trie a -> [(Key, a)]
266toList = foldWithKey (\k v r -> (k, v):r) []
267
268-- | /O(n)/ The number of elements.
269size :: Trie a -> Int
270size = fold (\_ r -> r + 1) 0
271
272-- | /O(max(L,R))/ Find all values where the string is a prefix of the key.
273prefixFind :: Key -> Trie a -> [a] 
274prefixFind q n = L.map snd (prefixFindInternal split q n)
275
276-- | /O(max(L,R))/ Find all values where the string is a prefix of the key and include the keys
277-- in the result.
278prefixFindWithKey :: Key -> Trie a -> [(Key, a)]
279prefixFindWithKey = prefixFindInternal split
280
281-- | /O(max(L,R))/ Same as 'prefixFind', but preprocesses the search key and every
282-- key in the map with @f@ before comparison.
283prefixFindBy :: (Key -> Key) -> Key -> Trie a -> [a]
284prefixFindBy f q n = L.map snd (prefixFindInternal (splitBy f) q n)
285
286-- | /O(max(L,R))/ Same as 'prefixFindWithKey', but preprocesses the search key and every
287-- key in the map with @f@ before comparison.
288prefixFindWithKeyBy :: (Key -> Key) -> Key -> Trie a -> [(Key, a)]
289prefixFindWithKeyBy f = prefixFindInternal (splitBy f)
290
291-- | Internal prefix find function which is used to implement every other prefix find function.
292prefixFindInternal :: (Key -> Key -> (Key, Key, Key)) -> Key -> Trie a -> [(Key, a)]
293prefixFindInternal f = prefixFindInternal' f []
294  where
295    prefixFindInternal' sf a p n | L.null pr = L.map (\(k, v) -> (a ++ k, v)) (toList n)
296                                 | L.null kr = concat (L.map (prefixFindInternal' sf (a ++ (key n)) pr) (succ n))
297                                 | otherwise = []
298                                 where (_, pr, kr) = sf p (key n)
299
300-- | /O(min(n,L))/ Find the value associated with a key. The function will @return@ the result in
301-- the monad or @fail@ in it if the key isn't in the map.
302lookup :: Monad m => Key -> Trie a -> m a
303lookup q n = case lookup' q n of
304             Just v -> return v
305             Nothing -> fail "Trie.lookup: Key not found"
306
307-- | Internal lookup function which is generalised for arbitrary monads above.
308lookup' :: Key -> Trie a -> Maybe a
309lookup' q n | L.null pr = if L.null kr then value n else Nothing
310            | L.null kr = let xs = (filter isJust (L.map (lookup pr) (succ n))) in
311                          if L.null xs then Nothing else head xs
312            | otherwise = Nothing
313            where (_, pr, kr) = split q (key n)
314
315-- | /O(max(L,R))/ Same as 'lookup', but preprocesses the search key and every
316-- key in the map with @f@ before comparison.
317lookupBy :: (Key -> Key) -> Key -> Trie a -> [a]
318lookupBy f q n | L.null pr = if L.null kr then maybeToList (value n) else []
319               | L.null kr = concat (L.map (lookupBy f pr) (succ n))
320               | otherwise = []
321               where (_, pr, kr) = splitBy f q (key n)
322
323-- | /O(min(n,L))/ Find the value associated with a key or return a default value if nothing
324-- was found.
325findWithDefault :: a -> Key -> Trie a -> a
326findWithDefault d q n = maybe d id (lookup q n)
327
328-- | /O(n)/ Fold over all key\/value pairs in the map.
329foldWithKey :: (Key -> a -> b -> b) -> b -> Trie a -> b
330foldWithKey f n m = fold' [] m n
331  where
332  fold' ck (End k v t) r = let nk = ck ++ k in foldr (fold' nk) (f nk v r) t
333  fold' ck (Seq k t) r   = let nk = ck ++ k in foldr (fold' nk) r t
334
335-- | /O(n)/ Fold over all values in the map.
336fold :: (a -> b -> b) -> b -> Trie a -> b
337fold f = foldWithKey (\_ v r -> f v r)
338
339-- | /O(n)/ Map over all key\/value pairs in the map.
340mapWithKey :: (Key -> a -> b) -> Trie a -> Trie b
341mapWithKey f m = map' [] m
342  where
343  map' ck (End k v t) = let nk = ck ++ k in End k (f nk v) (L.map (map' nk) t)
344  map' ck (Seq k t)   = let nk = ck ++ k in Seq k (L.map (map' nk) t)
345
346-- | /O(n)/ Map over all values in the map.
347map :: (a -> b) -> Trie a -> Trie b
348map f = mapWithKey (\_ v -> f v)
349
350-- | /O(n)/ Convert into an ordinary map.
351toMap :: Trie a -> M.Map Key a
352toMap = foldWithKey M.insert M.empty
353
354-- | /O(n)/ Convert an ordinary map into a StrMap.
355fromMap :: M.Map Key a -> Trie a
356fromMap = M.foldWithKey insert empty
357
358-- | /O(n)/ Calculate some statistics about the Trie for debugging purposes.