diff --git a/package.yaml b/package.yaml index f2b8dee7..ac620549 100644 --- a/package.yaml +++ b/package.yaml @@ -25,6 +25,7 @@ default-extensions: - ConstraintKinds - DataKinds - DefaultSignatures + - DeriveAnyClass - DeriveDataTypeable - DeriveGeneric - DerivingStrategies diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 0ef754d0..ea9491a8 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -15,6 +15,8 @@ import Control.Lens (makeLenses) import Data.Aeson (FromJSON (..), withText) import Data.Char (isAlphaNum) import Data.Char qualified as C +import Data.DList (DList) +import Data.DList qualified as DList import Data.Default (Default (..)) import Data.List qualified as L import Data.Map qualified as M @@ -26,8 +28,6 @@ import Text.Numeral.Roman (toRoman) import Xrefcheck.Progress import Xrefcheck.Util -import Data.DList (DList) -import Data.DList qualified as DList ----------------------------------------------------------- -- Types @@ -40,15 +40,10 @@ import Data.DList qualified as DList data Flavor = GitHub | GitLab - deriving stock (Show) + deriving stock (Show, Enum, Bounded) allFlavors :: [Flavor] -allFlavors = [GitHub, GitLab] - where - _exhaustivenessCheck = \case - GitHub -> () - GitLab -> () - -- if you update this, also update the list above +allFlavors = [minBound.. maxBound] instance FromJSON Flavor where parseJSON = withText "flavor" $ \txt -> @@ -62,6 +57,7 @@ instance FromJSON Flavor where -- representation of this thing, and it actually appears in reports only. newtype Position = Position (Maybe Text) deriving stock (Show, Eq, Generic) + deriving anyclass NFData instance Buildable Position where build (Position pos) = case pos of @@ -77,7 +73,9 @@ data Reference = Reference , rAnchor :: Maybe Text -- ^ Section or custom anchor tag. , rPos :: Position - } deriving stock (Show, Generic) + } + deriving stock (Show, Generic) + deriving anyclass NFData -- | Context of anchor. data AnchorType @@ -88,35 +86,51 @@ data AnchorType | BiblioAnchor -- ^ Id of entry in bibliography deriving stock (Show, Eq, Generic) + deriving anyclass NFData -- | A referable anchor. data Anchor = Anchor { aType :: AnchorType , aName :: Text , aPos :: Position - } deriving stock (Show, Eq, Generic) + } + deriving stock (Show, Eq, Generic) + deriving anyclass NFData + +data CopyPaste = CopyPaste + { cpAnchorText :: Text + , cpPlainText :: Text + , cpPosition :: Position + } + deriving stock (Show, Eq, Generic) + deriving anyclass NFData data FileInfoDiff = FileInfoDiff { _fidReferences :: DList Reference , _fidAnchors :: DList Anchor + , _fidCopyPastes :: DList CopyPaste } makeLenses ''FileInfoDiff diffToFileInfo :: FileInfoDiff -> FileInfo -diffToFileInfo (FileInfoDiff refs anchors) = - FileInfo (DList.toList refs) (DList.toList anchors) +diffToFileInfo (FileInfoDiff refs anchors pastas) = + FileInfo (DList.toList refs) (DList.toList anchors) (DList.toList pastas) instance Semigroup FileInfoDiff where - FileInfoDiff a b <> FileInfoDiff c d = FileInfoDiff (a <> c) (b <> d) + FileInfoDiff a b e <> FileInfoDiff c d f = FileInfoDiff (a <> c) (b <> d) (e <> f) instance Monoid FileInfoDiff where - mempty = FileInfoDiff mempty mempty + mempty = FileInfoDiff mempty mempty mempty -- | All information regarding a single file we care about. data FileInfo = FileInfo { _fiReferences :: [Reference] , _fiAnchors :: [Anchor] - } deriving stock (Show, Generic) + , _fiCopyPastes :: [CopyPaste] + } + deriving stock (Show, Generic) + deriving anyclass NFData + makeLenses ''FileInfo instance Default FileInfo where @@ -129,12 +143,6 @@ newtype RepoInfo = RepoInfo (Map FilePath FileInfo) -- Instances ----------------------------------------------------------- -instance NFData Position -instance NFData Reference -instance NFData AnchorType -instance NFData Anchor -instance NFData FileInfo - instance Buildable Reference where build Reference{..} = nameF ("reference " +| paren (build loc) |+ " " +| rPos |+ "") $ diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index 9dbad32f..711c3b24 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -18,11 +18,14 @@ module Xrefcheck.Scanners.Markdown import Universum import CMarkGFM (Node (..), NodeType (..), PosInfo (..), commonmarkToNode) +import Control.Lens hiding ((^?)) import Control.Monad.Except (MonadError, throwError) import Data.Aeson.TH (deriveFromJSON) import Data.ByteString.Lazy qualified as BSL +import Data.Char (isAlpha) import Data.DList qualified as DList import Data.Default (def) +import Data.List (isSubsequenceOf) import Data.Text qualified as T import Data.Text.Lazy qualified as LT import Fmt (Buildable (..), blockListF, nameF, (+|), (|+)) @@ -37,6 +40,8 @@ data MarkdownConfig = MarkdownConfig deriveFromJSON aesonConfigOption ''MarkdownConfig +makePrisms ''NodeType + defGithubMdConfig :: MarkdownConfig defGithubMdConfig = MarkdownConfig { mcFlavor = GitHub @@ -130,7 +135,7 @@ nodeExtractInfo input@(Node _ _ nSubs) = do else case removeIgnored input of Left err -> throwError err Right relevant -> - diffToFileInfo <$> foldNode extractor relevant + diffToFileInfo <$> foldNode (merge [extractor, copyPaste]) relevant where extractor :: Node -> m FileInfoDiff @@ -144,7 +149,9 @@ nodeExtractInfo input@(Node _ _ nSubs) = do let aType = HeaderAnchor lvl let aName = headerToAnchor flavor $ nodeExtractText node let aPos = toPosition pos - return $ FileInfoDiff DList.empty $ DList.singleton $ Anchor {aType, aName, aPos} + return mempty + { _fidAnchors = DList.singleton $ Anchor {aType, aName, aPos} + } HTML_INLINE text -> do let mName = T.stripSuffix "\">" =<< T.stripPrefix " do let aType = HandAnchor aPos = toPosition pos - return $ FileInfoDiff - mempty - (pure $ Anchor {aType, aName, aPos}) + return mempty + { _fidAnchors = DList.singleton $ Anchor {aType, aName, aPos} + } Nothing -> do return mempty @@ -167,12 +174,46 @@ nodeExtractInfo input@(Node _ _ nSubs) = do [t] -> (t, Nothing) t : ts -> (t, Just $ T.intercalate "#" ts) [] -> error "impossible" - return $ FileInfoDiff - (DList.singleton $ Reference {rName, rPos, rLink, rAnchor}) - DList.empty + return mempty + { _fidReferences = DList.singleton $ Reference {rName, rPos, rLink, rAnchor} + } _ -> return mempty + copyPaste :: Node -> m FileInfoDiff + copyPaste (Node _ (LIST _) nodes) = do + case items of + top : rest | urlIsASubsequence top -> do + let bad = filter (not . urlIsASubsequence) rest + pure mempty { _fidCopyPastes = DList.fromList bad } + _ -> do + pure mempty + where + items = do + (_, nodes', _) <- takeOnly _ITEM nodes + (_, nodes'', _) <- takeOnly _PARAGRAPH nodes' + take 1 $ do + (_, texts, (url, _)) <- takeOnly _LINK nodes'' + (pos, _, txt) <- take 1 $ takeOnly _TEXT texts + return (CopyPaste url txt (toPosition pos)) + + copyPaste _ = pure mempty + + takeOnly prizm list = do + Node pos hdr nodes <- list + case hdr^?prizm of + Just res -> return (pos, nodes, res) + Nothing -> [] + +urlIsASubsequence :: CopyPaste -> Bool +urlIsASubsequence paste = + gist (cpAnchorText paste) `isSubsequenceOf` gist (cpPlainText paste) + where + gist = T.unpack . T.toLower . T.filter isAlpha + +merge :: (Monad m, Monoid b) => [a -> m b] -> a -> m b +merge fs a = mconcat <$> traverse ($ a) fs + checkIgnoreFile :: [Node] -> Bool checkIgnoreFile nodes = let isSimpleComment :: Node -> Bool diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index f3be2154..92606550 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -27,6 +27,7 @@ import Universum import Control.Concurrent.Async (wait, withAsync) import Control.Exception (throwIO) import Control.Monad.Except (MonadError (..)) +import Data.Bits (toIntegralSized) import Data.ByteString qualified as BS import Data.Map qualified as M import Data.Text qualified as T @@ -49,7 +50,6 @@ import Text.Regex.TDFA.Text (Regex, regexec) import Text.URI (Authority (..), URI (..), mkURI) import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout) -import Data.Bits (toIntegralSized) import Xrefcheck.Config import Xrefcheck.Core import Xrefcheck.Orphans () @@ -70,9 +70,7 @@ deriving newtype instance Semigroup (VerifyResult e) deriving newtype instance Monoid (VerifyResult e) instance Buildable e => Buildable (VerifyResult e) where - build vr = case verifyErrors vr of - Nothing -> "ok" - Just errs -> listF errs + build vr = maybe "ok" listF (verifyErrors vr) verifyOk :: VerifyResult e -> Bool verifyOk (VerifyResult errors) = null errors @@ -114,6 +112,7 @@ data VerifyError | ExternalFtpException FTPException | FtpEntryDoesNotExist FilePath | ExternalResourceSomeError Text + | PossiblyIncorrectCopyPaste Text Text deriving stock (Show, Eq) instance Buildable VerifyError where @@ -156,10 +155,15 @@ instance Buildable VerifyError where "⛂ FTP exception (" +| err |+ ")\n" FtpEntryDoesNotExist entry -> - "⛂ File or directory does not exist:\n" +| entry |+ "\n" + "⛂ File or directory does not exist:\n" +| entry |+ "\n" ExternalResourceSomeError err -> "⛂ " +| build err |+ "\n\n" + + PossiblyIncorrectCopyPaste url text -> + "⛂ Possibly incorrect copy-paste in list with references\n" +| + " the url is " +| build url |+ "\n " +| + " but the text is " +| build text |+ "\n\n" where anchorHints = \case [] -> "\n" @@ -219,10 +223,19 @@ verifyRepo progressRef <- newIORef $ initVerifyProgress (map snd toScan) + errorss <- for (M.toList repoInfo) $ \(file, info) -> do + let pasta = _fiCopyPastes info + return + $ VerifyResult + $ fmap (\(CopyPaste url txt pos) -> + WithReferenceLoc file (Reference "" "" Nothing pos) + $ PossiblyIncorrectCopyPaste url txt) + pasta + accumulated <- withAsync (printer progressRef) $ \_ -> forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) -> verifyReference config mode progressRef repoInfo' root file ref - return $ fold accumulated + return $ fold errorss <> fold accumulated where printer progressRef = forever $ do readIORef progressRef >>= reprintAnalyseProgress rw mode diff --git a/tests/Test/Xrefcheck/CopyPasteInListsSpec.hs b/tests/Test/Xrefcheck/CopyPasteInListsSpec.hs new file mode 100644 index 00000000..08a0a3ab --- /dev/null +++ b/tests/Test/Xrefcheck/CopyPasteInListsSpec.hs @@ -0,0 +1,24 @@ +{- SPDX-FileCopyrightText: 2019 Serokell + - + - SPDX-License-Identifier: MPL-2.0 + -} + +module Test.Xrefcheck.CopyPasteInListsSpec where + +import Universum + +import Test.Hspec (Spec, describe, it, shouldBe) + +import Test.Xrefcheck.Util +import Xrefcheck.Core + +spec :: Spec +spec = do + describe "Possibly incorrect copy-paste" $ do + for_ allFlavors $ \fl -> do + it ("is detected (" <> show fl <> ")") $ do + fi <- getFI fl "tests/markdowns/without-annotations/copy-paste_in_lists.md" + getPasta fi `shouldBe`[("foo-bar","Foo Kek")] + where + getPasta :: FileInfo -> [(Text, Text)] + getPasta fi = map (cpAnchorText &&& cpPlainText) $ fi ^. fiCopyPastes diff --git a/tests/markdowns/without-annotations/copy-paste_in_lists.md b/tests/markdowns/without-annotations/copy-paste_in_lists.md new file mode 100644 index 00000000..aa88875c --- /dev/null +++ b/tests/markdowns/without-annotations/copy-paste_in_lists.md @@ -0,0 +1,17 @@ + + +A list with bad copy-paste: + +- [Foo Bar](foo-bar) e +- [Foo Qux](foo-qux) e +- [Foo Kek](foo-bar) e + +A list that is completely fine: + +- [Foo Bar](foo-bar) e +- [Foo Qux](foo-qux) e +- [Foo Kek](foo-kek) e