Skip to content

Commit

Permalink
Add copy-paste scanner and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Heimdell committed Nov 9, 2021
1 parent f60aece commit 94748c5
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/Xrefcheck/Core.hs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ data FileInfo = FileInfo
}
deriving stock (Show, Generic)
deriving anyclass NFData

makeLenses ''FileInfo

instance Default FileInfo where
Expand Down
30 changes: 30 additions & 0 deletions src/Xrefcheck/Scanners/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ module Xrefcheck.Scanners.Markdown
import Universum

import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode)
import Control.Lens hiding ((^?))
import Control.Monad.Except (MonadError, throwError)
import Data.Aeson.TH (deriveFromJSON)
import Data.ByteString.Lazy qualified as BSL
import Data.DList qualified as DList
import Data.Default (def)
import Data.List (isSubsequenceOf)
import Data.Text qualified as T
import Data.Text.Lazy qualified as LT
import Fmt (Buildable (..), blockListF, nameF, (+|), (|+))
Expand All @@ -37,6 +39,8 @@ data MarkdownConfig = MarkdownConfig

deriveFromJSON aesonConfigOption ''MarkdownConfig

makePrisms ''NodeType

defGithubMdConfig :: MarkdownConfig
defGithubMdConfig = MarkdownConfig
{ mcFlavor = GitHub
Expand Down Expand Up @@ -176,8 +180,34 @@ nodeExtractInfo input@(Node _ _ nSubs) = do
_ -> return mempty

copyPaste :: Node -> m FileInfoDiff
copyPaste (Node _ (LIST _) nodes) = do
case items of
top : rest | urlIsASubsequence top -> do
let bad = filter (not . urlIsASubsequence) rest
pure mempty { _fidCopyPastes = DList.fromList bad }
_ -> do
pure mempty
where
items = do
(_, nodes', _) <- takeOnly _ITEM nodes
(_, nodes'', _) <- takeOnly _PARAGRAPH nodes'
take 1 $ do
(_, texts, (url, _)) <- takeOnly _LINK nodes''
(pos, _, txt) <- take 1 $ takeOnly _TEXT texts
return (CopyPaste url txt (toPosition pos))

copyPaste _ = pure mempty

takeOnly prizm list = do
Node pos hdr nodes <- list
case hdr^?prizm of
Just res -> return (pos, nodes, res)
Nothing -> []

urlIsASubsequence :: CopyPaste -> Bool
urlIsASubsequence paste =
T.unpack (cpAnchorText paste) `isSubsequenceOf` T.unpack (cpPlainText paste)

merge :: (Monad m, Monoid b) => [a -> m b] -> a -> m b
merge fs a = mconcat <$> traverse ($ a) fs

Expand Down
25 changes: 19 additions & 6 deletions src/Xrefcheck/Verify.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import Universum
import Control.Concurrent.Async (wait, withAsync)
import Control.Exception (throwIO)
import Control.Monad.Except (MonadError (..))
import Data.Bits (toIntegralSized)
import Data.ByteString qualified as BS
import Data.Map qualified as M
import Data.Text qualified as T
Expand All @@ -49,7 +50,6 @@ import Text.Regex.TDFA.Text (Regex, regexec)
import Text.URI (Authority (..), URI (..), mkURI)
import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout)

import Data.Bits (toIntegralSized)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Orphans ()
Expand All @@ -70,9 +70,7 @@ deriving newtype instance Semigroup (VerifyResult e)
deriving newtype instance Monoid (VerifyResult e)

instance Buildable e => Buildable (VerifyResult e) where
build vr = case verifyErrors vr of
Nothing -> "ok"
Just errs -> listF errs
build vr = maybe "ok" listF (verifyErrors vr)

verifyOk :: VerifyResult e -> Bool
verifyOk (VerifyResult errors) = null errors
Expand Down Expand Up @@ -114,6 +112,7 @@ data VerifyError
| ExternalFtpException FTPException
| FtpEntryDoesNotExist FilePath
| ExternalResourceSomeError Text
| PossiblyIncorrectCopyPaste Text Text
deriving stock (Show, Eq)

instance Buildable VerifyError where
Expand Down Expand Up @@ -156,10 +155,15 @@ instance Buildable VerifyError where
"⛂ FTP exception (" +| err |+ ")\n"

FtpEntryDoesNotExist entry ->
"⛂ File or directory does not exist:\n" +| entry |+ "\n"
" File or directory does not exist:\n" +| entry |+ "\n"

ExternalResourceSomeError err ->
"" +| build err |+ "\n\n"

PossiblyIncorrectCopyPaste url text ->
"⛂ Possibly incorrect copy-paste in list with references\n" +|
" the url is " +| build url |+ "\n " +|
" but the text is " +| build text |+ "\n\n"
where
anchorHints = \case
[] -> "\n"
Expand Down Expand Up @@ -219,10 +223,19 @@ verifyRepo

progressRef <- newIORef $ initVerifyProgress (map snd toScan)

errorss <- for (M.toList repoInfo) $ \(file, info) -> do
let pasta = _fiCopyPastes info
return
$ VerifyResult
$ fmap (\(CopyPaste url txt pos) ->
WithReferenceLoc file (Reference "" "" Nothing pos)
$ PossiblyIncorrectCopyPaste url txt)
pasta

accumulated <- withAsync (printer progressRef) $ \_ ->
forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) ->
verifyReference config mode progressRef repoInfo' root file ref
return $ fold accumulated
return $ fold errorss <> fold accumulated
where
printer progressRef = forever $ do
readIORef progressRef >>= reprintAnalyseProgress rw mode
Expand Down
24 changes: 24 additions & 0 deletions tests/Test/Xrefcheck/CopyPasteInListsSpec.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{- SPDX-FileCopyrightText: 2019 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-}

module Test.Xrefcheck.CopyPasteInListsSpec where

import Universum

import Test.Hspec (Spec, describe, it, shouldBe)

import Test.Xrefcheck.Util
import Xrefcheck.Core

spec :: Spec
spec = do
describe "Possibly incorrect copy-paste" $ do
for_ allFlavors $ \fl -> do
it ("is detected (" <> show fl <> ")") $ do
fi <- getFI fl "tests/markdowns/without-annotations/copy-paste_in_lists.md"
getPasta fi `shouldBe`[("a", "c")]
where
getPasta :: FileInfo -> [(Text, Text)]
getPasta fi = map (cpAnchorText &&& cpPlainText) $ fi ^. fiCopyPastes
17 changes: 17 additions & 0 deletions tests/markdowns/without-annotations/copy-paste_in_lists.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!--
- SPDX-FileCopyrightText: 2018-2019 Serokell <https://serokell.io>
-
- SPDX-License-Identifier: MPL-2.0
-->

A list with bad copy-paste:

- [a](a) e
- [b](b) e
- [c](a) e

A list that is completely fine:

- [a](a) d
- [b](b) d
- [c](c) d

0 comments on commit 94748c5

Please sign in to comment.