Skip to content

Commit

Permalink
start on table parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jasminecao committed Dec 13, 2021
1 parent 72df0e6 commit e898cca
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 44 deletions.
16 changes: 8 additions & 8 deletions src/HTMLParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module HTMLParser where
import qualified Control.Monad as Monad
import Data.Char (isSpace)
import Data.Functor (($>))
import Syntax (Block (..), Doc (Doc), Line, Text (..), TableType (..))
import Syntax (Block (..), Doc (Doc), Line, TableBody (..), TableCell (..), TableHead (..), TableRow (..), Text (..))
import qualified Syntax as S
import Text.Parsec.Token
import Text.ParserCombinators.Parsec
Expand Down Expand Up @@ -113,19 +113,19 @@ hBrP = (try (openingTag "br") <|> string "<br/>") $> Br
hLineP :: Parser S.Line
hLineP = S.Line <$> many1 hTextP

hTableP :: Parser Block
hTableP :: Parser Block
hTableP = Table <$> hTableHeadP <*> hTableBodyP

hTableHeadP :: Parser TableType
hTableHeadP = TableHead <$> container "tr" hTableCellP
hTableHeadP :: Parser TableHead
hTableHeadP = TableHead . TableRow <$> container "tr" hTableCellP

hTableBodyP :: Parser TableType
hTableBodyP = TableBody <$> container "tr" hTableCellP
hTableBodyP :: Parser TableBody
hTableBodyP = TableBody <$> container "tbody" hTableRowP

hTableRowP :: Parser TableType
hTableRowP :: Parser TableRow
hTableRowP = TableRow <$> container "tr" hTableCellP

hTableCellP :: Parser TableType
hTableCellP :: Parser TableCell
hTableCellP = TableCell <$> lineContainer "td"

-- parses for a text string
Expand Down
12 changes: 9 additions & 3 deletions src/HTMLPrettyPrinter.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module HTMLPrettyPrinter where

import Syntax (Block (..), Doc (Doc), Line, Text (..), TableType (..))
import Syntax (Block (..), Doc (Doc), Line, TableBody (..), TableCell (..), TableHead (..), TableRow (..), Text (..))
import qualified Syntax as S
import Text.PrettyPrint hiding (braces, parens, sep, (<>))
import qualified Text.PrettyPrint as PP
Expand Down Expand Up @@ -39,10 +39,16 @@ instance PP S.Text where
pp (S.Link l href) = tagWithAttrs "a" [("href", href)] $ PP.hcat (map pp l)
pp (S.Normal s) = PP.text s

instance PP S.TableType where
pp (TableHead trs) = tag "thead" $ PP.hcat (map pp trs)
instance PP S.TableHead where
pp (TableHead tr) = tag "thead" $ pp tr

instance PP S.TableBody where
pp (TableBody trs) = tag "tbody" $ PP.hcat (map pp trs)

instance PP S.TableRow where
pp (TableRow tds) = tag "tr" $ PP.hcat (map pp tds)

instance PP S.TableCell where
pp (TableCell td) = tag "td" $ pp td

tag :: String -> PP.Doc -> PP.Doc
Expand Down
6 changes: 3 additions & 3 deletions src/HUnitTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import Data.Char (isSpace)
import HTMLParser
import MarkdownParser
import SampleText
import Syntax (Block (..), Doc (Doc), Line (..), Text (..))
import Syntax (Block (..), Doc (Doc), Line (..), TableBody (..), TableCell (..), TableHead (..), TableRow (..), Text (..))
import qualified Syntax as S
import Test.HUnit (Test (TestList), runTestTT, (~:), (~?=))
import Text.Parsec.Token
Expand Down Expand Up @@ -185,7 +185,7 @@ test_brPHrP =
test_tableP =
"table"
~: TestList
[]
[p tableP tableSimple ~?= Right (Table (TableHead (TableRow [TableCell (Line [Normal "Syntax "]), TableCell (Line [Normal "Description "])])) (TableBody [TableRow [TableCell (Line [Normal "Header "]), TableCell (Line [Normal "Title "])], TableRow [TableCell (Line [Normal "Paragraph "]), TableCell (Line [Normal "Text "])]]))]

test_blockP =
"parsing block"
Expand Down Expand Up @@ -309,7 +309,7 @@ test_hHeadingP =
p hHeadingP "<h7>Heading 7</h7>" ~?= Left "No parses",
p hHeadingP "Heading 1\n" ~?= Left "No parses",
p hHeadingP "<h2><h1>Heading 2</h2>" ~?= Right (Heading 2 (S.Line [Normal "<h1>Heading 2"]))
-- p hHeadingP "<h1>HEADING<a href=\"url\">ONE</a></h1>" ~?=
-- p hHeadingP "<h1>HEADING<a href=\"url\">ONE</a></h1>" ~?=
-- Right (Heading 1 (S.Line [Normal "HEADING ", Link ["ONE"] "url"]))
]

Expand Down
39 changes: 36 additions & 3 deletions src/MarkdownParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ module MarkdownParser where

import qualified Control.Monad as Monad
import Data.Char (isSpace)
import Data.Functor
import Syntax (Block (..), Doc (Doc), Line, Text (..))
import Data.Functor (($>))
import Syntax (Block (..), Doc (Doc), Line, TableBody, TableCell, TableHead, TableRow, Text (..))
import qualified Syntax as S
import Text.Parsec.Token
import Text.ParserCombinators.Parsec as Parsec
Expand Down Expand Up @@ -70,6 +70,39 @@ olListP =
lineP
return (startVal, firstItem : remainingItems)

-- TODO: spaces between separators
-- TODO: parse if no header separator
tableP :: Parser Block
tableP = do
firstRow <- rowP <* try theadSeparatorP
remainingRows <- many rowP
return $ Table (S.TableHead firstRow) (S.TableBody remainingRows)
where
-- parses the pipe character and any spaces/tabs following
pipeP :: Parser String
pipeP = string "|" <* many (string " " <|> string "\t")

-- parses for a row of header separators |---|---|---|
theadSeparatorP :: Parser ()
theadSeparatorP =
( pipeP
*> manyTill
( do
hx <- manyTill (char '-') pipeP
Monad.guard (length hx >= 3)
)
newLineChar
)
$> ()

-- parses a row of table cells
rowP :: Parser TableRow
rowP =
pipeP
*> ( S.TableRow
<$> manyTill (S.TableCell . S.Line <$> manyTill (try textP) (try pipeP)) newLineChar
)

-- parses for a link ([text](link))
linkP :: Parser Text
linkP = S.Link <$> bracketsP (manyTill textP (string "]")) <*> parensP (many (noneOf ")"))
Expand Down Expand Up @@ -152,7 +185,7 @@ normalP =
-- parses for a string until a reserved character is found
-- TODO: add this to syntax?
stringP :: Parser String
stringP = many1 $ noneOf ['*', '~', '`', '>', '_', '[', ']', '\n']
stringP = many1 $ noneOf ['*', '~', '`', '>', '_', '[', ']', '|', '\n']

-- removes trailing whitespace
wsP :: Parser a -> Parser a
Expand Down
28 changes: 14 additions & 14 deletions src/QCTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import qualified Control.Monad as Monad
import Data.Char (isSpace)
import MarkdownParser
import MarkdownPrettyPrinter
import Syntax (Block (..), Doc (Doc), Line, TableType (..), Text (..))
import Syntax (Block (..), Doc (Doc), Line, Text (..))
import qualified Syntax as S
import Test.HUnit
import Test.QuickCheck (Arbitrary, Gen, arbitrary, choose, oneof, shrink)
Expand Down Expand Up @@ -65,19 +65,19 @@ instance Arbitrary S.Line where
shrink (S.Line (x : xs)) = [S.Line xs]
shrink _ = []

instance Arbitrary TableType where
arbitrary =
oneof
[ TableHead <$> arbitrary,
TableBody <$> arbitrary,
TableRow <$> arbitrary,
TableCell . S.Line <$> arbitrary
]
-- instance Arbitrary TableType where
-- arbitrary =
-- oneof
-- [ TableHead <$> arbitrary,
-- TableBody <$> arbitrary,
-- TableRow <$> arbitrary,
-- TableCell . S.Line <$> arbitrary
-- ]

shrink (TableHead b) = b
shrink (TableBody r) = r
shrink (TableRow c) = c
shrink (TableCell c) = []
-- shrink (TableHead b) = b
-- shrink (TableBody r) = r
-- shrink (TableRow c) = c
-- shrink (TableCell c) = []

instance Arbitrary Block where
arbitrary =
Expand Down Expand Up @@ -111,7 +111,7 @@ instance Arbitrary Block where
shrink (CodeBlock ln) = CodeBlock <$> shrink ln
shrink Hr = [Hr]
shrink Br = [Br]
shrink (Table t) = undefined
shrink (Table thead tbody) = undefined

prop_roundtrip_text :: Text -> Bool
prop_roundtrip_text t = parse textP "" (markdownPretty t) == Right t
Expand Down
8 changes: 1 addition & 7 deletions src/SampleText.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,7 @@ blank.) Normal paragraphs should not be indented with spaces or tabs.
|]

tableSimple :: String
tableSimple =
[r|
| Syntax | Description |
| ----------- | ----------- |
| Header | Title |
| Paragraph | Text |
|]
tableSimple = "| Syntax | Description |\n|-----------|-----------|\n| Header | Title |\n| Paragraph | Text |\n"

-- headers and paragraphs only
blockTextSimple :: String
Expand Down
17 changes: 11 additions & 6 deletions src/Syntax.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,19 @@ data Block
| CodeBlock String
| Hr -- empty
| Br -- empty
| Table TableType TableType -- thead, tbody
| Table TableHead TableBody -- thead, tbody
deriving (Eq, Show)

data TableType
= TableHead [TableType]
| TableBody [TableType]
| TableRow [TableType]
| TableCell Line
newtype TableHead = TableHead TableRow
deriving (Eq, Show)

newtype TableBody = TableBody [TableRow]
deriving (Eq, Show)

newtype TableRow = TableRow [TableCell]
deriving (Eq, Show)

newtype TableCell = TableCell Line
deriving (Eq, Show)

-- | Line of text, may include multiple text elements.
Expand Down

0 comments on commit e898cca

Please sign in to comment.