Ticket #3267: HaskTags.hs

File HaskTags.hs, 12.8 KB (added by vincent.berthoux, 5 years ago)

new hasktags file

Line 
1
2module Main (main) where
3
4import Char
5import List
6import IO
7import System.Environment
8import System.Console.GetOpt
9import System.Exit
10
11
12-- search for definitions of things
13-- we do this by looking for the following patterns:
14-- data XXX = ...      giving a datatype location
15-- newtype XXX = ...   giving a newtype location
16-- bla :: ...          giving a function location
17--
18-- by doing it this way, we avoid picking up local definitions
19--              (whether this is good or not is a matter for debate)
20--
21
22-- We generate both CTAGS and ETAGS format tags files
23-- The former is for use in most sensible editors, while EMACS uses ETAGS
24
25-- alternatives: http://haskell.org/haskellwiki/Tags
26
27main :: IO ()
28main = do
29        progName <- getProgName
30        args <- getArgs
31        let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]"
32        let (modes, filenames, errs) = getOpt Permute options args
33        if errs /= [] || elem Help modes || filenames == []
34         then do
35           putStr $ unlines errs
36           putStr $ usageInfo usageString options
37           exitWith (ExitFailure 1)
38         else return ()
39        let mode = getMode $ filter pureModeFilter modes
40            extendedCtag = ExtendedCtag `elem` modes
41        let openFileMode = if elem Append modes
42                           then AppendMode
43                           else WriteMode
44        filedata <- mapM findthings filenames
45        if mode == CTags
46         then do
47           ctagsfile <- getOutFile "tags" openFileMode modes
48           writectagsfile ctagsfile extendedCtag filedata
49           hClose ctagsfile
50         else return ()
51        if mode == ETags
52         then do
53           etagsfile <- getOutFile "TAGS" openFileMode modes
54           writeetagsfile etagsfile filedata
55           hClose etagsfile
56         else return ()
57        -- avoid problem when both is used in combination
58        -- with redirection on stdout
59        if mode == BothTags
60           then do
61            ctagsfile <- getOutFile "tags" openFileMode modes
62            writectagsfile ctagsfile extendedCtag filedata
63            etagsfile <- getOutFile "TAGS" openFileMode modes
64            writeetagsfile etagsfile filedata
65            hClose etagsfile
66            hClose ctagsfile
67           else return ()
68
69-- | Used to filter mode list to avoid problem using the getMode
70-- function, the OutRedir was messing with it, and append was already
71-- filtered-out.
72pureModeFilter :: Mode -> Bool
73pureModeFilter Append       = False
74pureModeFilter ExtendedCtag = False
75pureModeFilter (OutRedir _) = False
76pureModeFilter _            = True
77
78-- | getMode takes a list of modes and extract the mode with the
79--   highest precedence.  These are as follows: Both, CTags, ETags
80--   The default case is Both.
81getMode :: [Mode] -> Mode
82getMode [] = BothTags
83getMode [x] = x
84getMode (x:xs) = max x (getMode xs)
85
86-- | getOutFile scan the modes searching for output redirection
87--   if not found, open the file with name passed as parameter.
88--   Handle special file -, which is stdout
89getOutFile :: String -> IOMode -> [Mode] -> IO Handle
90getOutFile _           _        ((OutRedir "-"):_) = return stdout
91getOutFile _           openMode ((OutRedir f):_)   = openFile f openMode
92getOutFile name        openMode (x:xs)             = getOutFile name openMode xs
93getOutFile defaultName openMode []                 = openFile defaultName openMode
94
95data Mode = ExtendedCtag
96          | OutRedir String 
97          | ETags 
98          | CTags 
99          | BothTags 
100          | Append 
101          | Help
102          deriving (Ord, Eq, Show)
103
104options :: [OptDescr Mode]
105options = [ Option "c" ["ctags"]
106            (NoArg CTags) "generate CTAGS file (ctags)"
107          , Option "e" ["etags"]
108            (NoArg ETags) "generate ETAGS file (etags)"
109          , Option "b" ["both"]
110            (NoArg BothTags) ("generate both CTAGS and ETAGS")
111          , Option "a" ["append"]
112            (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)")
113          , Option "o" ["output"]
114            (ReqArg (OutRedir) "") ("output to given file, instead of 'tags', '-' file is stdout")
115          , Option "f" ["file"]
116            (ReqArg (OutRedir) "") ("same as -o, but used as compatibility with ctags")
117          , Option "x" ["extendedctag"]
118            (NoArg ExtendedCtag) ("Generate additional information in ctag file.")
119          , Option "h" ["help"] (NoArg Help) "This help"
120          ]
121
122type FileName = String
123
124type ThingName = String
125
126-- The position of a token or definition
127data Pos = Pos
128                FileName -- file name
129                Int      -- line number
130                Int      -- token number
131                String   -- string that makes up that line
132    deriving (Show, Eq)
133
134data ThingKind =
135      KindClass
136    | KindModule
137    | KindData
138    | KindType
139    | KindNewtype
140    | KindVal
141    | KindConstructor
142    deriving (Show, Eq)
143
144-- A definition we have found
145data FoundThing = FoundThing ThingName ThingKind Pos
146    deriving (Show, Eq)
147
148-- Data we have obtained from a file
149data FileData = FileData FileName [FoundThing]
150
151data Token = Token String Pos
152    deriving Show
153
154
155-- stuff for dealing with ctags output format
156
157writectagsfile :: Handle -> Bool -> [FileData] -> IO ()
158writectagsfile ctagsfile extended filedata = do
159    let things = concat $ map getfoundthings filedata
160    if extended
161       then do
162        hPutStrLn ctagsfile "!_TAG_FILE_FORMAT\t2\t/extended format; --format=1 will not append ;\" to lines/"
163        hPutStrLn ctagsfile "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/"
164        hPutStrLn ctagsfile "!_TAG_PROGRAM_NAME\thasktags //"
165       else return ()
166    mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing extended x) (sortThings things)
167
168sortThings :: [FoundThing] -> [FoundThing]
169sortThings = sortBy (\(FoundThing a _ _) (FoundThing b _ _) -> compare a b)
170
171getfoundthings :: FileData -> [FoundThing]
172getfoundthings (FileData _ things) = things
173
174-- | Associate kind with a letter to be outputed
175-- in an extended ctags file
176kindLetter :: ThingKind -> String
177kindLetter KindClass = "C"
178kindLetter KindModule = "m"
179kindLetter KindData = "d"
180kindLetter KindType = "t"
181kindLetter KindNewtype = "n"
182kindLetter KindVal = "v"
183kindLetter KindConstructor = "c"
184
185-- | Dump found tag in normal or extended (read : vim like) ctag
186-- line
187dumpthing :: Bool -> FoundThing -> String
188dumpthing False (FoundThing name kind (Pos filename line _ _)) =
189    name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1)
190dumpthing True (FoundThing name kind (Pos filename line _ lineText)) =
191    name ++ "\t" ++ filename
192         ++ "\t/^" ++ (concat $ map ctagEncode lineText)
193         ++ "$/;\"\t" ++ (kindLetter kind)
194         ++ "\tline:" ++ (show $ line + 1)
195
196ctagEncode :: Char -> String
197ctagEncode '/' = '\\' : '/' : []
198ctagEncode '\\' = '\\' : '\\' : []
199ctagEncode a = [a]
200
201-- stuff for dealing with etags output format
202
203writeetagsfile :: Handle -> [FileData] -> IO ()
204writeetagsfile etagsfile filedata = do
205    mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata
206
207e_dumpfiledata :: FileData -> String
208e_dumpfiledata (FileData filename things) =
209    "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump
210    where thingsdump = concat $ map e_dumpthing things
211          thingslength = length thingsdump
212
213e_dumpthing :: FoundThing -> String
214e_dumpthing (FoundThing _ _ (Pos _ line token fullline)) =
215    (concat $ take (token + 1) $ spacedwords fullline)
216 ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n"
217
218
219-- like "words", but keeping the whitespace, and so letting us build
220-- accurate prefixes
221
222spacedwords :: String -> [String]
223spacedwords [] = []
224spacedwords xs = (blanks ++ wordchars):(spacedwords rest2)
225    where (blanks,rest) = span Char.isSpace xs
226          (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest
227
228
229-- Find the definitions in a file
230
231findthings :: FileName -> IO FileData
232findthings filename = do
233    text <- readFile filename
234    evaluate text -- forces evaluation of text
235                  -- too many files were being opened otherwise since
236                  -- readFile is lazy
237    let aslines = lines text
238    let wordlines = map mywords aslines
239    let noslcoms = map stripslcomments wordlines
240    let tokens = concat $ zipWith3 (withline filename) noslcoms aslines [0 ..]
241    -- there are some tokens with "" (don't know why yet) this filter fixes it
242    let tokens' = filter (\(Token s _ ) -> (not .  null) s ) tokens
243    let nocoms = stripblockcomments tokens'
244    -- using nub because getcons and findstuff are parsing parts of the file twice
245    return $ FileData filename $ nub $ findstuff nocoms
246  where evaluate [] = return ()
247        evaluate (c:cs) = c `seq` evaluate cs
248        -- my words is mainly copied from Data.List.
249        -- difference abc::def is split into three words instead of one.
250        -- We should really be lexing Haskell properly here rather
251        -- than using hacks like this. In the future we expect hasktags
252        -- to be replaced by something using the GHC API.
253        mywords :: String -> [String]
254        mywords (':':':':xs) = "::" : mywords xs
255        mywords s =  case dropWhile isSpace s of
256                         "" -> []
257                         s' -> w : mywords s''
258                             where (w, s'') = myBreak s'
259                                   myBreak [] = ([],[])
260                                   myBreak (':':':':xs) = ([], "::"++xs)
261                                   myBreak (' ':xs) = ([],xs);
262                                   myBreak (x:xs) = let (a,b) = myBreak xs
263                                                    in  (x:a,b)
264
265-- Create tokens from words, by recording their line number
266-- and which token they are through that line
267
268withline :: FileName -> [String] -> String -> Int -> [Token]
269withline filename theWords fullline i =
270    zipWith (\w t -> Token w (Pos filename i t fullline)) theWords $ [0 ..]
271
272-- comments stripping
273
274stripslcomments :: [String] -> [String]
275stripslcomments ("--" : _) = []
276stripslcomments (x : xs) = x : stripslcomments xs
277stripslcomments [] = []
278
279stripblockcomments :: [Token] -> [Token]
280stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs
281stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs
282stripblockcomments (x:xs) = x:stripblockcomments xs
283stripblockcomments [] = []
284
285afterlitend :: [Token] -> [Token]
286afterlitend (Token "\\begin{code}" _ : xs) = xs
287afterlitend (_ : xs) = afterlitend xs
288afterlitend [] = []
289
290afterblockcomend :: [Token] -> [Token]
291afterblockcomend ((Token token _):xs)
292 | contains "-}" token = xs
293 | otherwise           = afterblockcomend xs
294afterblockcomend [] = []
295
296
297-- does one string contain another string
298
299contains :: Eq a => [a] -> [a] -> Bool
300contains sub full = any (isPrefixOf sub) $ tails full
301
302-- actually pick up definitions
303
304findstuff :: [Token] -> [FoundThing]
305findstuff ((Token "module" _):(Token name pos):xs) =
306    FoundThing name KindModule pos : (getcons xs) ++ (findstuff xs)
307findstuff ((Token "data" _):(Token name pos):xs) =
308    FoundThing name KindData pos : (getcons xs) ++ (findstuff xs)
309findstuff ((Token "newtype" _):(Token name pos):xs) =
310    FoundThing name KindNewtype pos : findstuff xs
311findstuff ((Token "type" _):(Token name pos):xs) =
312    FoundThing name KindType pos : findstuff xs
313findstuff ((Token "class" _):xs) = findClassName xs
314findstuff ((Token name pos):(Token "::" _):xs) =
315    FoundThing name KindVal pos : findstuff xs
316findstuff (_ : xs) = findstuff xs
317findstuff [] = []
318
319findClassName :: [Token] -> [FoundThing]
320findClassName []  = []
321findClassName [Token n p]  = [FoundThing n KindClass p]
322findClassName xs = (\(Token n pos : xs') -> FoundThing n KindClass pos : findstuff xs') . drop2 . dropParens 0 $ xs
323
324dropParens :: Integer -> [Token] -> [Token]
325dropParens n (Token "(" _ : xs) = dropParens (n + 1) xs
326dropParens 0 (x           : xs) = x : xs
327dropParens 1 (Token ")" _ : xs) = xs
328dropParens n (Token ")" _ : xs) = dropParens (n - 1) xs
329dropParens n (_           : xs) = dropParens n xs
330dropParens _ []                 = [] -- Shouldn't happen on correct source
331
332-- dropsEverything till token "=>" (if it is on the same line as the
333-- first token. if not return tokens)
334drop2 :: [Token] -> [Token]
335drop2 tokens@(Token _ (Pos _ line_nr _ _ ) : _) =
336  let (line, following) = span (\(Token _ (Pos _ l _ _)) -> l == line_nr) tokens
337      (_, following_in_line) = span (\(Token n _) -> n /= "=>") line
338  in case following_in_line of
339          (Token "=>" _ : xs) -> xs ++ following
340          _ -> tokens
341drop2 xs = xs
342
343-- get the constructor definitions, knowing that a datatype has just started
344
345getcons :: [Token] -> [FoundThing]
346getcons (Token "=" _ : Token name pos : xs) =
347    FoundThing name KindConstructor pos : getcons2 xs
348getcons (_ : xs) = getcons xs
349getcons [] = []
350
351getcons2 :: [Token] -> [FoundThing]
352getcons2 (Token "=" _ : _) = []
353getcons2 (Token "|" _ : Token name pos : xs) =
354    FoundThing name KindConstructor pos : getcons2 xs
355getcons2 (_:xs) = getcons2 xs
356getcons2 [] = []
357