module Language.Haskell.HsColour.Anchors
  ( insertAnchors
  ) where

import Language.Haskell.HsColour.Classify
import Language.Haskell.HsColour.General
import Data.List
import Data.Char (isUpper, isLower, isDigit, ord, intToDigit)

-- This is an attempt to find the first defining occurrence of an
-- identifier (function, datatype, class) in a Haskell source file.
-- Rather than parse the module properly, we try to get by with just
-- a finite state automaton.  Keeping a record of identifiers we
-- have already seen, we look at the beginning of every line to see
-- if it starts with the right tokens to introduce a defn.  If so,
-- we look a little bit further until we can be certain.  Then plonk
-- (or not) an anchor at the beginning of the line.

type Anchor = String

-- | 'insertAnchors' places an anchor marker in the token stream before the
--   first defining occurrence of any identifier.  Here, /before/ means
--   immediately preceding its type signature, or preceding a (haddock)
--   comment that comes immediately before the type signature, or failing
--   either of those, before the first equation.
insertAnchors :: [(TokenType,String)] -> [Either Anchor (TokenType,String)]
insertAnchors :: [(TokenType, String)] -> [Either String (TokenType, String)]
insertAnchors = ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor ST
emptyST

-- looks at first token in the left-most position of each line
-- precondition: have just seen a newline token.
anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor st :: ST
st s :: [(TokenType, String)]
s = case ST -> [(TokenType, String)] -> Maybe String
identifier ST
st [(TokenType, String)]
s of
                Nothing -> ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit ST
st [(TokenType, String)]
s
                Just v :: String
v  -> String -> Either String (TokenType, String)
forall a b. a -> Either a b
Left (String -> String
escape String
v)Either String (TokenType, String)
-> [Either String (TokenType, String)]
-> [Either String (TokenType, String)]
forall a. a -> [a] -> [a]
: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit (String -> ST -> ST
insertST String
v ST
st) [(TokenType, String)]
s

-- some chars are not valid in anchor URIs: http://www.ietf.org/rfc/rfc3986
-- NOTE: This code assumes characters are 8-bit.
--       Ideally, it should transcode to utf8 octets first.
escape :: String -> String
escape :: String -> String
escape = (Char -> String) -> String -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Char -> String
enc
    where enc :: Char -> String
enc x :: Char
x | Char -> Bool
isDigit Char
x
                Bool -> Bool -> Bool
|| Char -> Bool
isURIFragmentValid Char
x
                Bool -> Bool -> Bool
|| Char -> Bool
isLower Char
x
                Bool -> Bool -> Bool
|| Char -> Bool
isUpper Char
x = [Char
x]
                | Char -> Int
ord Char
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 256 = [Char
x] -- not correct, but better than nothing
                | Bool
otherwise  = ['%',Int -> Char
hexHi (Char -> Int
ord Char
x), Int -> Char
hexLo (Char -> Int
ord Char
x)]
          hexHi :: Int -> Char
hexHi d :: Int
d = Int -> Char
intToDigit (Int
dInt -> Int -> Int
forall a. Integral a => a -> a -> a
`div`16)
          hexLo :: Int -> Char
hexLo d :: Int
d = Int -> Char
intToDigit (Int
dInt -> Int -> Int
forall a. Integral a => a -> a -> a
`mod`16)
          isURIFragmentValid :: Char -> Bool
isURIFragmentValid x :: Char
x = Char
x Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` "!$&'()*+,;=/?-._~:@"

-- emit passes stuff through until the next newline has been encountered,
-- then jumps back into the anchor function
-- pre-condition: newlines are explicitly single tokens
emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit st :: ST
st (t :: (TokenType, String)
t@(Space,"\n"):stream :: [(TokenType, String)]
stream) = (TokenType, String) -> Either String (TokenType, String)
forall a b. b -> Either a b
Right (TokenType, String)
tEither String (TokenType, String)
-> [Either String (TokenType, String)]
-> [Either String (TokenType, String)]
forall a. a -> [a] -> [a]
: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor ST
st [(TokenType, String)]
stream
emit st :: ST
st (t :: (TokenType, String)
t:stream :: [(TokenType, String)]
stream)              = (TokenType, String) -> Either String (TokenType, String)
forall a b. b -> Either a b
Right (TokenType, String)
tEither String (TokenType, String)
-> [Either String (TokenType, String)]
-> [Either String (TokenType, String)]
forall a. a -> [a] -> [a]
: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit ST
st [(TokenType, String)]
stream
emit _  []                      = []

-- Given that we are at the beginning of a line, determine whether there
-- is an identifier defined here, and if so, return it.
-- precondition: have just seen a newline token.
identifier ::  ST -> [(TokenType, String)] -> Maybe String
identifier :: ST -> [(TokenType, String)] -> Maybe String
identifier st :: ST
st t :: [(TokenType, String)]
t@((kind :: TokenType
kind,v :: String
v):stream :: [(TokenType, String)]
stream) | TokenType
kindTokenType -> [TokenType] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`[TokenType
Varid,TokenType
Definition] =
    case [(TokenType, String)] -> [(TokenType, String)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
stream of
        ((Varop,v :: String
v):_) | Bool -> Bool
not (String
vString -> ST -> Bool
`inST`ST
st) -> String -> Maybe String
forall a. a -> Maybe a
Just (String -> String
fix String
v)
        notVarop :: [(TokenType, String)]
notVarop  --  | typesig stream  -> Nothing    -- not a defn
                      | String
v String -> ST -> Bool
`inST` ST
st     -> Maybe String
forall a. Maybe a
Nothing    -- already defined
                      | Bool
otherwise       -> String -> Maybe String
forall a. a -> Maybe a
Just String
v
identifier st :: ST
st t :: [(TokenType, String)]
t@((Layout,"("):stream :: [(TokenType, String)]
stream) =
    case [(TokenType, String)]
stream of
      ((Varop,v :: String
v):(Layout,")"):_)
                  --  | typesig stream  -> Nothing
	              | String
v String -> ST -> Bool
`inST` ST
st     -> Maybe String
forall a. Maybe a
Nothing
	              | Bool
otherwise	-> String -> Maybe String
forall a. a -> Maybe a
Just (String -> String
fix String
v)
      notVarop :: [(TokenType, String)]
notVarop -> case [(TokenType, String)] -> [(TokenType, String)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip ([(TokenType, String)] -> [(TokenType, String)]
munchParens [(TokenType, String)]
stream) of
          ((Varop,v :: String
v):_) | Bool -> Bool
not (String
vString -> ST -> Bool
`inST`ST
st) -> String -> Maybe String
forall a. a -> Maybe a
Just (String -> String
fix String
v)
          _             -> Maybe String
forall a. Maybe a
Nothing
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"foreign"):stream :: [(TokenType, String)]
stream) = Maybe String
forall a. Maybe a
Nothing -- not yet implemented
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"data"):(Space,_):(Keyword,"family"):stream :: [(TokenType, String)]
stream)
                                             = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"data"):stream :: [(TokenType, String)]
stream)    = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"newtype"):stream :: [(TokenType, String)]
stream) = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"type"):(Space,_):(Keyword,"family"):stream :: [(TokenType, String)]
stream)
                                             = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"type"):stream :: [(TokenType, String)]
stream)    = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"class"):stream :: [(TokenType, String)]
stream)   = [(TokenType, String)] -> Maybe String
getConid [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Keyword,"instance"):stream :: [(TokenType, String)]
stream)= [(TokenType, String)] -> Maybe String
getInstance [(TokenType, String)]
stream
identifier st :: ST
st t :: [(TokenType, String)]
t@((Comment,_):(Space,"\n"):stream :: [(TokenType, String)]
stream) = ST -> [(TokenType, String)] -> Maybe String
identifier ST
st [(TokenType, String)]
stream
identifier st :: ST
st stream :: [(TokenType, String)]
stream = Maybe String
forall a. Maybe a
Nothing

-- Is this really a type signature?  (no longer used)
typesig :: [(TokenType,String)] -> Bool
typesig :: [(TokenType, String)] -> Bool
typesig ((Keyglyph,"::"):_)   = Bool
True
typesig ((Varid,_):stream :: [(TokenType, String)]
stream)    = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((Layout,"("):(Varop,_):(Layout,")"):stream :: [(TokenType, String)]
stream)    = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((Layout,","):stream :: [(TokenType, String)]
stream) = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((Space,_):stream :: [(TokenType, String)]
stream)    = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig ((Comment,_):stream :: [(TokenType, String)]
stream)  = [(TokenType, String)] -> Bool
typesig [(TokenType, String)]
stream
typesig _                     = Bool
False

-- throw away everything from opening paren to matching close
munchParens ::  [(TokenType, String)] -> [(TokenType, String)]
munchParens :: [(TokenType, String)] -> [(TokenType, String)]
munchParens =  Int -> [(TokenType, String)] -> [(TokenType, String)]
forall a.
(Eq a, Num a) =>
a -> [(TokenType, String)] -> [(TokenType, String)]
munch (0::Int)	-- already seen open paren
  where munch :: a -> [(TokenType, String)] -> [(TokenType, String)]
munch 0 ((Layout,")"):rest :: [(TokenType, String)]
rest) = [(TokenType, String)]
rest
        munch n :: a
n ((Layout,")"):rest :: [(TokenType, String)]
rest) = a -> [(TokenType, String)] -> [(TokenType, String)]
munch (a
na -> a -> a
forall a. Num a => a -> a -> a
-1) [(TokenType, String)]
rest
        munch n :: a
n ((Layout,"("):rest :: [(TokenType, String)]
rest) = a -> [(TokenType, String)] -> [(TokenType, String)]
munch (a
na -> a -> a
forall a. Num a => a -> a -> a
+1) [(TokenType, String)]
rest
        munch n :: a
n (_:rest :: [(TokenType, String)]
rest)            = a -> [(TokenType, String)] -> [(TokenType, String)]
munch a
n [(TokenType, String)]
rest
        munch _ []                  = []	-- source is ill-formed

-- ensure anchor name is correct for a Varop
fix ::  String -> String
fix :: String -> String
fix ('`':v :: String
v) = Char -> String -> String
forall a. Eq a => a -> [a] -> [a]
dropLast '`' String
v
fix v :: String
v       = String
v

-- look past whitespace and comments to next "real" token
skip ::  [(TokenType, t)] -> [(TokenType, t)]
skip :: [(TokenType, t)] -> [(TokenType, t)]
skip ((Space,_):stream :: [(TokenType, t)]
stream)   = [(TokenType, t)] -> [(TokenType, t)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, t)]
stream
skip ((Comment,_):stream :: [(TokenType, t)]
stream) = [(TokenType, t)] -> [(TokenType, t)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, t)]
stream
skip stream :: [(TokenType, t)]
stream               = [(TokenType, t)]
stream

-- skip possible context up to and including "=>", returning next Conid token
-- (this function is highly partial - relies on source being parse-correct)
getConid ::  [(TokenType, String)] -> Maybe String
getConid :: [(TokenType, String)] -> Maybe String
getConid stream :: [(TokenType, String)]
stream =
    case [(TokenType, String)] -> [(TokenType, String)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
stream of
        ((Conid,c :: String
c):rest :: [(TokenType, String)]
rest) -> case [(TokenType, String)] -> [(TokenType, String)]
context [(TokenType, String)]
rest of
                              ((Keyglyph,"="):_)     -> String -> Maybe String
forall a. a -> Maybe a
Just String
c
                              ((Keyglyph,"=>"):more :: [(TokenType, String)]
more) ->
                                  case [(TokenType, String)] -> [(TokenType, String)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
more of
                                      ((Conid,c' :: String
c'):_) -> String -> Maybe String
forall a. a -> Maybe a
Just String
c'
                                      v :: [(TokenType, String)]
v -> [(TokenType, String)] -> String -> Maybe String
forall p p a. p -> p -> Maybe a
debug [(TokenType, String)]
v ("Conid "String -> String -> String
forall a. [a] -> [a] -> [a]
++String
cString -> String -> String
forall a. [a] -> [a] -> [a]
++" =>")
                              v :: [(TokenType, String)]
v -> [(TokenType, String)] -> String -> Maybe String
forall p p a. p -> p -> Maybe a
debug [(TokenType, String)]
v ("Conid "String -> String -> String
forall a. [a] -> [a] -> [a]
++String
cString -> String -> String
forall a. [a] -> [a] -> [a]
++" no = or =>")
        ((Layout,"("):rest :: [(TokenType, String)]
rest) -> case [(TokenType, String)] -> [(TokenType, String)]
context [(TokenType, String)]
rest of
                                   ((Keyglyph,"=>"):more :: [(TokenType, String)]
more) ->
                                       case [(TokenType, String)] -> [(TokenType, String)]
forall t. [(TokenType, t)] -> [(TokenType, t)]
skip [(TokenType, String)]
more of
                                           ((Conid,c' :: String
c'):_) -> String -> Maybe String
forall a. a -> Maybe a
Just String
c'
                                           v :: [(TokenType, String)]
v -> [(TokenType, String)] -> String -> Maybe String
forall p p a. p -> p -> Maybe a
debug [(TokenType, String)]
v ("(...) =>")
                                   v :: [(TokenType, String)]
v -> [(TokenType, String)] -> String -> Maybe String
forall p p a. p -> p -> Maybe a
debug [(TokenType, String)]
v ("(...) no =>")
        v :: [(TokenType, String)]
v -> [(TokenType, String)] -> String -> Maybe String
forall p p a. p -> p -> Maybe a
debug [(TokenType, String)]
v ("no Conid or (...)")
    where debug :: p -> p -> Maybe a
debug   _   _ = Maybe a
forall a. Maybe a
Nothing
       -- debug (s:t) c = error ("HsColour: getConid failed: "++show s
       --                       ++"\n  in the context of: "++c)

-- jump past possible class context
context ::  [(TokenType, String)] -> [(TokenType, String)]
context :: [(TokenType, String)] -> [(TokenType, String)]
context stream :: [(TokenType, String)]
stream@((Keyglyph,"="):_) = [(TokenType, String)]
stream
context stream :: [(TokenType, String)]
stream@((Keyglyph,"=>"):_) = [(TokenType, String)]
stream
context stream :: [(TokenType, String)]
stream@((Keyglyph,"⇒"):_) = [(TokenType, String)]
stream
context (_:stream :: [(TokenType, String)]
stream) = [(TokenType, String)] -> [(TokenType, String)]
context [(TokenType, String)]
stream
context [] = []

-- the anchor name for an instance is just the entire instance head, minus
-- any extra context clause
getInstance :: [(TokenType, String)] -> Maybe String
getInstance = String -> Maybe String
forall a. a -> Maybe a
Just (String -> Maybe String)
-> ([(TokenType, String)] -> String)
-> [(TokenType, String)]
-> Maybe String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ST -> String
unwords (ST -> String)
-> ([(TokenType, String)] -> ST) -> [(TokenType, String)] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ("instance"String -> ST -> ST
forall a. a -> [a] -> [a]
:) (ST -> ST)
-> ([(TokenType, String)] -> ST) -> [(TokenType, String)] -> ST
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ST
words (String -> ST)
-> ([(TokenType, String)] -> String) -> [(TokenType, String)] -> ST
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ST -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (ST -> String)
-> ([(TokenType, String)] -> ST) -> [(TokenType, String)] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TokenType, String) -> String) -> [(TokenType, String)] -> ST
forall a b. (a -> b) -> [a] -> [b]
map (TokenType, String) -> String
forall a b. (a, b) -> b
snd
              ([(TokenType, String)] -> ST)
-> ([(TokenType, String)] -> [(TokenType, String)])
-> [(TokenType, String)]
-> ST
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(TokenType, String)] -> [(TokenType, String)]
trimContext ([(TokenType, String)] -> [(TokenType, String)])
-> ([(TokenType, String)] -> [(TokenType, String)])
-> [(TokenType, String)]
-> [(TokenType, String)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TokenType, String) -> Bool)
-> [(TokenType, String)] -> [(TokenType, String)]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Bool -> Bool
not (Bool -> Bool)
-> ((TokenType, String) -> Bool) -> (TokenType, String) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TokenType, String) -> Bool
terminator)
  where
    trimContext :: [(TokenType, String)] -> [(TokenType, String)]
trimContext ts :: [(TokenType, String)]
ts = if (TokenType
Keyglyph,"=>") (TokenType, String) -> [(TokenType, String)] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [(TokenType, String)]
ts
                     Bool -> Bool -> Bool
||  (TokenType
Keyglyph,"⇒") (TokenType, String) -> [(TokenType, String)] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [(TokenType, String)]
ts
                     then [(TokenType, String)] -> [(TokenType, String)]
forall a. [a] -> [a]
tail ([(TokenType, String)] -> [(TokenType, String)])
-> ([(TokenType, String)] -> [(TokenType, String)])
-> [(TokenType, String)]
-> [(TokenType, String)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TokenType, String) -> Bool)
-> [(TokenType, String)] -> [(TokenType, String)]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile ((TokenType, String) -> [(TokenType, String)] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem`[(TokenType
Keyglyph,"=>")
                                                     ,(TokenType
Keyglyph,"⇒")]) ([(TokenType, String)] -> [(TokenType, String)])
-> [(TokenType, String)] -> [(TokenType, String)]
forall a b. (a -> b) -> a -> b
$ [(TokenType, String)]
ts
                     else [(TokenType, String)]
ts
    terminator :: (TokenType, String) -> Bool
terminator (Keyword, _)   = Bool
True
    terminator (Comment, _)   = Bool
True
    terminator (Cpp,     _)   = Bool
True
    terminator (Keyglyph,"|") = Bool
True
    terminator (Layout,  ";") = Bool
True
    terminator (Layout,  "{") = Bool
True
    terminator (Layout,  "}") = Bool
True
    terminator _              = Bool
False

-- simple implementation of a string lookup table.
-- replace this with something more sophisticated if needed.
type ST = [String]

emptyST :: ST
emptyST :: ST
emptyST = []

insertST :: String -> ST -> ST
insertST :: String -> ST -> ST
insertST k :: String
k st :: ST
st = String -> ST -> ST
forall a. Ord a => a -> [a] -> [a]
insert String
k ST
st

inST :: String -> ST -> Bool
inST :: String -> ST -> Bool
inST k :: String
k st :: ST
st = String
k String -> ST -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ST
st