From e898cca3e5347033200e908b3217f1cb03e6b7ae Mon Sep 17 00:00:00 2001 From: Jasmine Cao Date: Mon, 13 Dec 2021 11:54:24 -0500 Subject: [PATCH] start on table parsing --- src/HTMLParser.hs | 16 ++++++++-------- src/HTMLPrettyPrinter.hs | 12 +++++++++--- src/HUnitTests.hs | 6 +++--- src/MarkdownParser.hs | 39 ++++++++++++++++++++++++++++++++++++--- src/QCTests.hs | 28 ++++++++++++++-------------- src/SampleText.hs | 8 +------- src/Syntax.hs | 17 +++++++++++------ 7 files changed, 82 insertions(+), 44 deletions(-) diff --git a/src/HTMLParser.hs b/src/HTMLParser.hs index c4e5ed6..8572bea 100644 --- a/src/HTMLParser.hs +++ b/src/HTMLParser.hs @@ -3,7 +3,7 @@ module HTMLParser where import qualified Control.Monad as Monad import Data.Char (isSpace) import Data.Functor (($>)) -import Syntax (Block (..), Doc (Doc), Line, Text (..), TableType (..)) +import Syntax (Block (..), Doc (Doc), Line, TableBody (..), TableCell (..), TableHead (..), TableRow (..), Text (..)) import qualified Syntax as S import Text.Parsec.Token import Text.ParserCombinators.Parsec @@ -113,19 +113,19 @@ hBrP = (try (openingTag "br") <|> string "
") $> Br hLineP :: Parser S.Line hLineP = S.Line <$> many1 hTextP -hTableP :: Parser Block +hTableP :: Parser Block hTableP = Table <$> hTableHeadP <*> hTableBodyP -hTableHeadP :: Parser TableType -hTableHeadP = TableHead <$> container "tr" hTableCellP +hTableHeadP :: Parser TableHead +hTableHeadP = TableHead . TableRow <$> container "tr" hTableCellP -hTableBodyP :: Parser TableType -hTableBodyP = TableBody <$> container "tr" hTableCellP +hTableBodyP :: Parser TableBody +hTableBodyP = TableBody <$> container "tbody" hTableRowP -hTableRowP :: Parser TableType +hTableRowP :: Parser TableRow hTableRowP = TableRow <$> container "tr" hTableCellP -hTableCellP :: Parser TableType +hTableCellP :: Parser TableCell hTableCellP = TableCell <$> lineContainer "td" -- parses for a text string diff --git a/src/HTMLPrettyPrinter.hs b/src/HTMLPrettyPrinter.hs index 917568b..8af4393 100644 --- a/src/HTMLPrettyPrinter.hs +++ b/src/HTMLPrettyPrinter.hs @@ -1,6 +1,6 @@ module HTMLPrettyPrinter where -import Syntax (Block (..), Doc (Doc), Line, Text (..), TableType (..)) +import Syntax (Block (..), Doc (Doc), Line, TableBody (..), TableCell (..), TableHead (..), TableRow (..), Text (..)) import qualified Syntax as S import Text.PrettyPrint hiding (braces, parens, sep, (<>)) import qualified Text.PrettyPrint as PP @@ -39,10 +39,16 @@ instance PP S.Text where pp (S.Link l href) = tagWithAttrs "a" [("href", href)] $ PP.hcat (map pp l) pp (S.Normal s) = PP.text s -instance PP S.TableType where - pp (TableHead trs) = tag "thead" $ PP.hcat (map pp trs) +instance PP S.TableHead where + pp (TableHead tr) = tag "thead" $ pp tr + +instance PP S.TableBody where pp (TableBody trs) = tag "tbody" $ PP.hcat (map pp trs) + +instance PP S.TableRow where pp (TableRow tds) = tag "tr" $ PP.hcat (map pp tds) + +instance PP S.TableCell where pp (TableCell td) = tag "td" $ pp td tag :: String -> PP.Doc -> PP.Doc diff --git a/src/HUnitTests.hs b/src/HUnitTests.hs index 31cf466..7455d26 100644 --- a/src/HUnitTests.hs +++ b/src/HUnitTests.hs @@ -4,7 +4,7 @@ import Data.Char (isSpace) import HTMLParser import MarkdownParser import SampleText -import Syntax (Block (..), Doc (Doc), Line (..), Text (..)) +import Syntax (Block (..), Doc (Doc), Line (..), TableBody (..), TableCell (..), TableHead (..), TableRow (..), Text (..)) import qualified Syntax as S import Test.HUnit (Test (TestList), runTestTT, (~:), (~?=)) import Text.Parsec.Token @@ -185,7 +185,7 @@ test_brPHrP = test_tableP = "table" ~: TestList - [] + [p tableP tableSimple ~?= Right (Table (TableHead (TableRow [TableCell (Line [Normal "Syntax "]), TableCell (Line [Normal "Description "])])) (TableBody [TableRow [TableCell (Line [Normal "Header "]), TableCell (Line [Normal "Title "])], TableRow [TableCell (Line [Normal "Paragraph "]), TableCell (Line [Normal "Text "])]]))] test_blockP = "parsing block" @@ -309,7 +309,7 @@ test_hHeadingP = p hHeadingP "Heading 7" ~?= Left "No parses", p hHeadingP "Heading 1\n" ~?= Left "No parses", p hHeadingP "

Heading 2

" ~?= Right (Heading 2 (S.Line [Normal "

Heading 2"])) - -- p hHeadingP "

HEADINGONE

" ~?= + -- p hHeadingP "

HEADINGONE

" ~?= -- Right (Heading 1 (S.Line [Normal "HEADING ", Link ["ONE"] "url"])) ] diff --git a/src/MarkdownParser.hs b/src/MarkdownParser.hs index edd449b..e2ac7da 100644 --- a/src/MarkdownParser.hs +++ b/src/MarkdownParser.hs @@ -2,8 +2,8 @@ module MarkdownParser where import qualified Control.Monad as Monad import Data.Char (isSpace) -import Data.Functor -import Syntax (Block (..), Doc (Doc), Line, Text (..)) +import Data.Functor (($>)) +import Syntax (Block (..), Doc (Doc), Line, TableBody, TableCell, TableHead, TableRow, Text (..)) import qualified Syntax as S import Text.Parsec.Token import Text.ParserCombinators.Parsec as Parsec @@ -70,6 +70,39 @@ olListP = lineP return (startVal, firstItem : remainingItems) +-- TODO: spaces between separators +-- TODO: parse if no header separator +tableP :: Parser Block +tableP = do + firstRow <- rowP <* try theadSeparatorP + remainingRows <- many rowP + return $ Table (S.TableHead firstRow) (S.TableBody remainingRows) + where + -- parses the pipe character and any spaces/tabs following + pipeP :: Parser String + pipeP = string "|" <* many (string " " <|> string "\t") + + -- parses for a row of header separators |---|---|---| + theadSeparatorP :: Parser () + theadSeparatorP = + ( pipeP + *> manyTill + ( do + hx <- manyTill (char '-') pipeP + Monad.guard (length hx >= 3) + ) + newLineChar + ) + $> () + + -- parses a row of table cells + rowP :: Parser TableRow + rowP = + pipeP + *> ( S.TableRow + <$> manyTill (S.TableCell . S.Line <$> manyTill (try textP) (try pipeP)) newLineChar + ) + -- parses for a link ([text](link)) linkP :: Parser Text linkP = S.Link <$> bracketsP (manyTill textP (string "]")) <*> parensP (many (noneOf ")")) @@ -152,7 +185,7 @@ normalP = -- parses for a string until a reserved character is found -- TODO: add this to syntax? stringP :: Parser String -stringP = many1 $ noneOf ['*', '~', '`', '>', '_', '[', ']', '\n'] +stringP = many1 $ noneOf ['*', '~', '`', '>', '_', '[', ']', '|', '\n'] -- removes trailing whitespace wsP :: Parser a -> Parser a diff --git a/src/QCTests.hs b/src/QCTests.hs index ab7d74e..f0f3fe3 100644 --- a/src/QCTests.hs +++ b/src/QCTests.hs @@ -4,7 +4,7 @@ import qualified Control.Monad as Monad import Data.Char (isSpace) import MarkdownParser import MarkdownPrettyPrinter -import Syntax (Block (..), Doc (Doc), Line, TableType (..), Text (..)) +import Syntax (Block (..), Doc (Doc), Line, Text (..)) import qualified Syntax as S import Test.HUnit import Test.QuickCheck (Arbitrary, Gen, arbitrary, choose, oneof, shrink) @@ -65,19 +65,19 @@ instance Arbitrary S.Line where shrink (S.Line (x : xs)) = [S.Line xs] shrink _ = [] -instance Arbitrary TableType where - arbitrary = - oneof - [ TableHead <$> arbitrary, - TableBody <$> arbitrary, - TableRow <$> arbitrary, - TableCell . S.Line <$> arbitrary - ] +-- instance Arbitrary TableType where +-- arbitrary = +-- oneof +-- [ TableHead <$> arbitrary, +-- TableBody <$> arbitrary, +-- TableRow <$> arbitrary, +-- TableCell . S.Line <$> arbitrary +-- ] - shrink (TableHead b) = b - shrink (TableBody r) = r - shrink (TableRow c) = c - shrink (TableCell c) = [] +-- shrink (TableHead b) = b +-- shrink (TableBody r) = r +-- shrink (TableRow c) = c +-- shrink (TableCell c) = [] instance Arbitrary Block where arbitrary = @@ -111,7 +111,7 @@ instance Arbitrary Block where shrink (CodeBlock ln) = CodeBlock <$> shrink ln shrink Hr = [Hr] shrink Br = [Br] - shrink (Table t) = undefined + shrink (Table thead tbody) = undefined prop_roundtrip_text :: Text -> Bool prop_roundtrip_text t = parse textP "" (markdownPretty t) == Right t diff --git a/src/SampleText.hs b/src/SampleText.hs index 8e376a5..6e3ecff 100644 --- a/src/SampleText.hs +++ b/src/SampleText.hs @@ -33,13 +33,7 @@ blank.) Normal paragraphs should not be indented with spaces or tabs. |] tableSimple :: String -tableSimple = - [r| -| Syntax | Description | -| ----------- | ----------- | -| Header | Title | -| Paragraph | Text | -|] +tableSimple = "| Syntax | Description |\n|-----------|-----------|\n| Header | Title |\n| Paragraph | Text |\n" -- headers and paragraphs only blockTextSimple :: String diff --git a/src/Syntax.hs b/src/Syntax.hs index 088a726..297ff37 100644 --- a/src/Syntax.hs +++ b/src/Syntax.hs @@ -17,14 +17,19 @@ data Block | CodeBlock String | Hr -- empty | Br -- empty - | Table TableType TableType -- thead, tbody + | Table TableHead TableBody -- thead, tbody deriving (Eq, Show) -data TableType - = TableHead [TableType] - | TableBody [TableType] - | TableRow [TableType] - | TableCell Line +newtype TableHead = TableHead TableRow + deriving (Eq, Show) + +newtype TableBody = TableBody [TableRow] + deriving (Eq, Show) + +newtype TableRow = TableRow [TableCell] + deriving (Eq, Show) + +newtype TableCell = TableCell Line deriving (Eq, Show) -- | Line of text, may include multiple text elements.