Use hoogle v5. (#953)

Hoogle v5 uses a different json representation for the data, requires a
new URL and also contains HTML markup inside the json fields.

This commit removes all markup to be able to handle the hoogle responses
using the same structure as with the previous response structure (from
hoogle v4).
This commit is contained in:
Vaibhav Sagar 2018-11-02 10:27:23 -04:00 committed by GitHub
commit 3f77626c36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1532 additions and 520 deletions

View File

@ -164,6 +164,7 @@ Test-Suite hspec
IHaskell.Test.Completion IHaskell.Test.Completion
IHaskell.Test.Util IHaskell.Test.Util
IHaskell.Test.Parser IHaskell.Test.Parser
IHaskell.Test.Hoogle
default-language: Haskell2010 default-language: Haskell2010
build-depends: build-depends:
base, base,
@ -178,6 +179,7 @@ Test-Suite hspec
directory, directory,
text, text,
shelly, shelly,
raw-strings-qq,
setenv setenv
source-repository head source-repository head

File diff suppressed because one or more lines are too long

View File

@ -1,24 +1,31 @@
{-# LANGUAGE NoImplicitPrelude, FlexibleInstances, OverloadedStrings #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module IHaskell.Eval.Hoogle ( module IHaskell.Eval.Hoogle (
search, search,
document, document,
render, render,
OutputFormat(..), OutputFormat(..),
HoogleResult, HoogleResult(..),
HoogleResponse(..),
parseResponse,
) where ) where
import qualified Data.ByteString.Char8 as CBS
import qualified Data.ByteString.Lazy as LBS
import Data.Either (either)
import IHaskellPrelude import IHaskellPrelude
import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Char8 as CBS
import Data.Aeson
import Data.Char (isAlphaNum, isAscii)
import qualified Data.List as List
import qualified Data.Text as T
import Data.Vector (toList)
import Network.HTTP.Client import Network.HTTP.Client
import Network.HTTP.Client.TLS import Network.HTTP.Client.TLS
import Data.Aeson
import qualified Data.List as List
import Data.Char (isAscii, isAlphaNum)
import StringUtils (split, strip, replace) import StringUtils (replace, split, strip)
-- | Types of formats to render output to. -- | Types of formats to render output to.
data OutputFormat = Plain -- ^ Render to plain text. data OutputFormat = Plain -- ^ Render to plain text.
@ -35,17 +42,19 @@ data HoogleResult = SearchResult HoogleResponse
data HoogleResponseList = HoogleResponseList [HoogleResponse] data HoogleResponseList = HoogleResponseList [HoogleResponse]
instance FromJSON HoogleResponseList where instance FromJSON HoogleResponseList where
parseJSON (Object obj) = do parseJSON (Array arr) =
results <- obj .: "results" HoogleResponseList <$> mapM parseJSON (toList arr)
HoogleResponseList <$> mapM parseJSON results
parseJSON _ = fail "Expected object with 'results' field." parseJSON _ = fail "Expected array."
instance FromJSON HoogleResponse where instance FromJSON HoogleResponse where
parseJSON (Object obj) = parseJSON (Object obj) =
HoogleResponse <$> obj .: "location" <*> obj .: "self" <*> obj .: "docs" HoogleResponse
<$> obj .: "url"
<*> (removeMarkup <$> obj .: "item")
<*> obj .: "docs"
parseJSON _ = fail "Expected object with fields: location, self, docs" parseJSON _ = fail "Expected object with fields: url, item, docs"
-- | Query Hoogle for the given string. This searches Hoogle using the internet. It returns either -- | Query Hoogle for the given string. This searches Hoogle using the internet. It returns either
-- an error message or the successful JSON result. -- an error message or the successful JSON result.
@ -59,7 +68,7 @@ query str = do
where where
queryUrl :: String -> String queryUrl :: String -> String
queryUrl = printf "https://www.haskell.org/hoogle/?hoogle=%s&mode=json" queryUrl = printf "http://hoogle.haskell.org/?hoogle=%s&mode=json"
-- | Copied from the HTTP package. -- | Copied from the HTTP package.
urlEncode :: String -> String urlEncode :: String -> String
@ -87,18 +96,16 @@ urlEncode (ch:t)
-- | Search for a query on Hoogle. Return all search results. -- | Search for a query on Hoogle. Return all search results.
search :: String -> IO [HoogleResult] search :: String -> IO [HoogleResult]
search string = do search string = either ((:[]) . NoResult) parseResponse <$> query string
response <- query string
return $ parseResponse :: String -> [HoogleResult]
case response of parseResponse jsn =
Left err -> [NoResult err] case eitherDecode $ LBS.fromStrict $ CBS.pack jsn of
Right jsn -> Left err -> [NoResult err]
case eitherDecode $ LBS.fromStrict $ CBS.pack jsn of Right results ->
Left err -> [NoResult err] case map SearchResult $ (\(HoogleResponseList l) -> l) results of
Right results -> [] -> [NoResult "no matching identifiers found."]
case map SearchResult $ (\(HoogleResponseList l) -> l) results of res -> res
[] -> [NoResult "no matching identifiers found."]
res -> res
-- | Look up an identifier on Hoogle. Return documentation for that identifier. If there are many -- | Look up an identifier on Hoogle. Return documentation for that identifier. If there are many
-- identifiers, include documentation for all of them. -- identifiers, include documentation for all of them.
@ -118,13 +125,13 @@ document string = do
matches _ = False matches _ = False
toDocResult (SearchResult resp) = Just $ DocResult resp toDocResult (SearchResult resp) = Just $ DocResult resp
toDocResult (DocResult _) = Nothing toDocResult (DocResult _) = Nothing
toDocResult (NoResult _) = Nothing toDocResult (NoResult _) = Nothing
-- | Render a Hoogle search result into an output format. -- | Render a Hoogle search result into an output format.
render :: OutputFormat -> HoogleResult -> String render :: OutputFormat -> HoogleResult -> String
render Plain = renderPlain render Plain = renderPlain
render HTML = renderHtml render HTML = renderHtml
-- | Render a Hoogle result to plain text. -- | Render a Hoogle result to plain text.
renderPlain :: HoogleResult -> String renderPlain :: HoogleResult -> String
@ -181,6 +188,12 @@ renderSelf string loc
span "hoogle-class" (link loc $ extractNewtype string) ++ span "hoogle-class" (link loc $ extractNewtype string) ++
packageSub package packageSub package
| "type" `isPrefixOf` string =
let package = extractPackageName loc
in nwt ++ " " ++
span "hoogle-class" (link loc $ extractType string) ++
packageSub package
| otherwise = | otherwise =
let [name, args] = split "::" string let [name, args] = split "::" string
package = extractPackageName loc package = extractPackageName loc
@ -197,6 +210,7 @@ renderSelf string loc
extractClass = strip . replace "class" "" extractClass = strip . replace "class" ""
extractData = strip . replace "data" "" extractData = strip . replace "data" ""
extractNewtype = strip . replace "newtype" "" extractNewtype = strip . replace "newtype" ""
extractType = strip . replace "newtype" ""
pkg = span "hoogle-head" "package" pkg = span "hoogle-head" "package"
mdl = span "hoogle-head" "module" mdl = span "hoogle-head" "module"
cls = span "hoogle-head" "class" cls = span "hoogle-head" "class"
@ -223,21 +237,7 @@ renderSelf string loc
", " ++ mdl ++ " " ++ span "hoogle-module" modname ++ ")" ", " ++ mdl ++ " " ++ span "hoogle-module" modname ++ ")"
renderDocs :: String -> String renderDocs :: String -> String
renderDocs doc = renderDocs doc = div' "hoogle-doc" doc
let groups = List.groupBy bothAreCode $ lines doc
nonull = filter (not . null . strip)
bothAreCode s1 s2 =
isPrefixOf ">" (strip s1) &&
isPrefixOf ">" (strip s2)
isCode xs =
case xs of
[] -> False
(s:_) -> isPrefixOf ">" $ strip s
makeBlock xs =
if isCode xs
then div' "hoogle-code" $ unlines $ nonull xs
else div' "hoogle-text" $ unlines $ nonull xs
in div' "hoogle-doc" $ unlines $ map makeBlock groups
extractPackageName :: String -> Maybe String extractPackageName :: String -> Maybe String
extractPackageName lnk = do extractPackageName lnk = do
@ -259,3 +259,19 @@ span = printf "<span class='%s'>%s</span>"
link :: String -> String -> String link :: String -> String -> String
link = printf "<a target='_blank' href='%s'>%s</a>" link = printf "<a target='_blank' href='%s'>%s</a>"
-- | very explicit cleaning of the type signature in the hoogle 5 response,
-- to remove html markup and escaped characters.
removeMarkup :: String -> String
removeMarkup s = T.unpack $ List.foldl (flip ($)) (T.pack s) replaceAll
where replacements :: [ (T.Text, T.Text) ]
replacements = [ ( "<span class=name>", "" )
, ( "</span>", "" )
, ( "<0>", "" )
, ( "</0>", "" )
, ( "&gt;", ">" )
, ( "&lt;", "<" )
, ( "<b>", "")
, ( "</b>", "")
]
replaceAll = uncurry T.replace <$> replacements

View File

@ -7,10 +7,12 @@ import Test.Hspec
import IHaskell.Test.Completion (testCompletions) import IHaskell.Test.Completion (testCompletions)
import IHaskell.Test.Parser (testParser) import IHaskell.Test.Parser (testParser)
import IHaskell.Test.Eval (testEval) import IHaskell.Test.Eval (testEval)
import IHaskell.Test.Hoogle (testHoogle)
main :: IO () main :: IO ()
main = main =
hspec $ do hspec $ do
testParser testParser
testEval testEval
testCompletions testCompletions
testHoogle

View File

@ -0,0 +1,63 @@
{-# LANGUAGE QuasiQuotes #-}
module IHaskell.Test.Hoogle ( testHoogle ) where
import Test.Hspec
import Text.RawString.QQ
import IHaskell.Eval.Hoogle
-- import Data.Text (unpack)
-- import qualified Data.Text.IO as T
preludeFmapJson :: String
preludeFmapJson = [r|
[
{
"url": "https://hackage.haskell.org/package/base/docs/Prelude.html#v:fmap",
"module": {
"url": "https://hackage.haskell.org/package/base/docs/Prelude.html",
"name": "Prelude"
},
"package": {
"url": "https://hackage.haskell.org/package/base",
"name": "base"
},
"item": "<span class=name><0>fmap</0></span> :: Functor f =&gt; (a -&gt; b) -&gt; f a -&gt; f b",
"type": "",
"docs": ""
}
]|]
moduleJson :: String
moduleJson = [r|
[
{
"url": "https://hackage.haskell.org/package/universum/docs/Universum-Functor-Fmap.html",
"module": {},
"package": {
"url": "https://hackage.haskell.org/package/universum",
"name": "universum"
},
"item": "<b>module</b> Universum.Functor.<span class=name><0>Fmap</0></span>",
"type": "module",
"docs": "This module contains useful functions to work with <a>Functor</a> type\nclass.\n"
}
]|]
testHoogle :: Spec
testHoogle = describe "Hoogle Search" $ do
describe "fmap search result" $ do
let results = parseResponse preludeFmapJson :: [HoogleResult]
it "should find 1 results" $ do
length results `shouldBe` 1
let (SearchResult (HoogleResponse loc signature _docUrl)) = head results
it "should not contain html markup" $ do
loc `shouldBe` "https://hackage.haskell.org/package/base/docs/Prelude.html#v:fmap"
signature `shouldBe` "fmap :: Functor f => (a -> b) -> f a -> f b"
describe "module result" $ do
let results = parseResponse moduleJson :: [HoogleResult]
let (SearchResult (HoogleResponse _loc signature _docUrl)) = head results
it "should not contain html markup" $ do
signature `shouldBe` "module Universum.Functor.Fmap"
it "should be renderable" $ do
(render Plain $ head results) `shouldStartWith` "module Universum.Functor.Fmap"
(render HTML $ head results) `shouldStartWith` "<span class='hoogle-head'>module</span>"