From 94748c51d4d9b80318f18a0c45843d7b0f54de1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BD=D0=B4=D1=80=D0=B5=D0=B5=D0=B2=20=D0=9A=D0=B8?= =?UTF-8?q?=D1=80=D0=B8=D0=BB=D0=BB?= Date: Tue, 9 Nov 2021 19:49:27 +0400 Subject: [PATCH] Add copy-paste scanner and tests --- src/Xrefcheck/Core.hs | 1 + src/Xrefcheck/Scanners/Markdown.hs | 30 +++++++++++++++++++ src/Xrefcheck/Verify.hs | 25 ++++++++++++---- tests/Test/Xrefcheck/CopyPasteInListsSpec.hs | 24 +++++++++++++++ .../copy-paste_in_lists.md | 17 +++++++++++ 5 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 tests/Test/Xrefcheck/CopyPasteInListsSpec.hs create mode 100644 tests/markdowns/without-annotations/copy-paste_in_lists.md diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 80ffd57a..ea9491a8 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -130,6 +130,7 @@ data FileInfo = FileInfo } deriving stock (Show, Generic) deriving anyclass NFData + makeLenses ''FileInfo instance Default FileInfo where diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index 92942a06..0db2cf3d 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -18,11 +18,13 @@ module Xrefcheck.Scanners.Markdown import Universum import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode) +import Control.Lens hiding ((^?)) import Control.Monad.Except (MonadError, throwError) import Data.Aeson.TH (deriveFromJSON) import Data.ByteString.Lazy qualified as BSL import Data.DList qualified as DList import Data.Default (def) +import Data.List (isSubsequenceOf) import Data.Text qualified as T import Data.Text.Lazy qualified as LT import Fmt (Buildable (..), blockListF, nameF, (+|), (|+)) @@ -37,6 +39,8 @@ data MarkdownConfig = MarkdownConfig deriveFromJSON aesonConfigOption ''MarkdownConfig +makePrisms ''NodeType + defGithubMdConfig :: MarkdownConfig defGithubMdConfig = MarkdownConfig { mcFlavor = GitHub @@ -176,8 +180,34 @@ nodeExtractInfo input@(Node _ _ nSubs) = do _ -> return mempty copyPaste :: Node -> m FileInfoDiff + copyPaste (Node _ (LIST _) nodes) = do + case items of + top : rest | urlIsASubsequence top -> do + let bad = filter (not . urlIsASubsequence) rest + pure mempty { _fidCopyPastes = DList.fromList bad } + _ -> do + pure mempty + where + items = do + (_, nodes', _) <- takeOnly _ITEM nodes + (_, nodes'', _) <- takeOnly _PARAGRAPH nodes' + take 1 $ do + (_, texts, (url, _)) <- takeOnly _LINK nodes'' + (pos, _, txt) <- take 1 $ takeOnly _TEXT texts + return (CopyPaste url txt (toPosition pos)) + copyPaste _ = pure mempty + takeOnly prizm list = do + Node pos hdr nodes <- list + case hdr^?prizm of + Just res -> return (pos, nodes, res) + Nothing -> [] + +urlIsASubsequence :: CopyPaste -> Bool +urlIsASubsequence paste = + T.unpack (cpAnchorText paste) `isSubsequenceOf` T.unpack (cpPlainText paste) + merge :: (Monad m, Monoid b) => [a -> m b] -> a -> m b merge fs a = mconcat <$> traverse ($ a) fs diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index f3be2154..92606550 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -27,6 +27,7 @@ import Universum import Control.Concurrent.Async (wait, withAsync) import Control.Exception (throwIO) import Control.Monad.Except (MonadError (..)) +import Data.Bits (toIntegralSized) import Data.ByteString qualified as BS import Data.Map qualified as M import Data.Text qualified as T @@ -49,7 +50,6 @@ import Text.Regex.TDFA.Text (Regex, regexec) import Text.URI (Authority (..), URI (..), mkURI) import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout) -import Data.Bits (toIntegralSized) import Xrefcheck.Config import Xrefcheck.Core import Xrefcheck.Orphans () @@ -70,9 +70,7 @@ deriving newtype instance Semigroup (VerifyResult e) deriving newtype instance Monoid (VerifyResult e) instance Buildable e => Buildable (VerifyResult e) where - build vr = case verifyErrors vr of - Nothing -> "ok" - Just errs -> listF errs + build vr = maybe "ok" listF (verifyErrors vr) verifyOk :: VerifyResult e -> Bool verifyOk (VerifyResult errors) = null errors @@ -114,6 +112,7 @@ data VerifyError | ExternalFtpException FTPException | FtpEntryDoesNotExist FilePath | ExternalResourceSomeError Text + | PossiblyIncorrectCopyPaste Text Text deriving stock (Show, Eq) instance Buildable VerifyError where @@ -156,10 +155,15 @@ instance Buildable VerifyError where "⛂ FTP exception (" +| err |+ ")\n" FtpEntryDoesNotExist entry -> - "⛂ File or directory does not exist:\n" +| entry |+ "\n" + "⛂ File or directory does not exist:\n" +| entry |+ "\n" ExternalResourceSomeError err -> "⛂ " +| build err |+ "\n\n" + + PossiblyIncorrectCopyPaste url text -> + "⛂ Possibly incorrect copy-paste in list with references\n" +| + " the url is " +| build url |+ "\n " +| + " but the text is " +| build text |+ "\n\n" where anchorHints = \case [] -> "\n" @@ -219,10 +223,19 @@ verifyRepo progressRef <- newIORef $ initVerifyProgress (map snd toScan) + errorss <- for (M.toList repoInfo) $ \(file, info) -> do + let pasta = _fiCopyPastes info + return + $ VerifyResult + $ fmap (\(CopyPaste url txt pos) -> + WithReferenceLoc file (Reference "" "" Nothing pos) + $ PossiblyIncorrectCopyPaste url txt) + pasta + accumulated <- withAsync (printer progressRef) $ \_ -> forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) -> verifyReference config mode progressRef repoInfo' root file ref - return $ fold accumulated + return $ fold errorss <> fold accumulated where printer progressRef = forever $ do readIORef progressRef >>= reprintAnalyseProgress rw mode diff --git a/tests/Test/Xrefcheck/CopyPasteInListsSpec.hs b/tests/Test/Xrefcheck/CopyPasteInListsSpec.hs new file mode 100644 index 00000000..dac6b9d4 --- /dev/null +++ b/tests/Test/Xrefcheck/CopyPasteInListsSpec.hs @@ -0,0 +1,24 @@ +{- SPDX-FileCopyrightText: 2019 Serokell + - + - SPDX-License-Identifier: MPL-2.0 + -} + +module Test.Xrefcheck.CopyPasteInListsSpec where + +import Universum + +import Test.Hspec (Spec, describe, it, shouldBe) + +import Test.Xrefcheck.Util +import Xrefcheck.Core + +spec :: Spec +spec = do + describe "Possibly incorrect copy-paste" $ do + for_ allFlavors $ \fl -> do + it ("is detected (" <> show fl <> ")") $ do + fi <- getFI fl "tests/markdowns/without-annotations/copy-paste_in_lists.md" + getPasta fi `shouldBe`[("a", "c")] + where + getPasta :: FileInfo -> [(Text, Text)] + getPasta fi = map (cpAnchorText &&& cpPlainText) $ fi ^. fiCopyPastes diff --git a/tests/markdowns/without-annotations/copy-paste_in_lists.md b/tests/markdowns/without-annotations/copy-paste_in_lists.md new file mode 100644 index 00000000..44b42b17 --- /dev/null +++ b/tests/markdowns/without-annotations/copy-paste_in_lists.md @@ -0,0 +1,17 @@ + + +A list with bad copy-paste: + +- [a](a) e +- [b](b) e +- [c](a) e + +A list that is completely fine: + +- [a](a) d +- [b](b) d +- [c](c) d