半角をspanタグで囲む

haskell-jp · Apr 13, 2017 · c6dc1ae · c6dc1ae
1 parent 8251955
commit c6dc1ae
Show file tree

Hide file tree

Showing 4 changed files with 230 additions and 5 deletions.
diff --git a/Makefile b/Makefile
@@ -83,6 +83,9 @@ site: $(SITE_PROG_PATH)
 	@# good idea.
 	stack exec -- site rebuild
 
+test:
+	stack test
+
 # Run a test webserver on http://0.0.0.0:8000 serving up the content of our
 # blog.  If the content changes, it is automatically rebuilt.
 watch: $(SITE_PROG_PATH)

diff --git a/haskell-jp-blog.cabal b/haskell-jp-blog.cabal
@@ -12,5 +12,16 @@ executable site
                   , containers >= 0.5
                   , data-default >= 0.5
                   , hakyll
+                  , pandoc-types
   ghc-options:      -threaded -Wall
   default-language: Haskell2010
+
+test-suite site-doctest
+  type:                exitcode-stdio-1.0
+  main-is:             DocTest.hs
+  hs-source-dirs:      test
+  build-depends:       base
+                     , doctest
+                     , Glob
+  default-language:    Haskell2010
+  ghc-options:         -Wall -threaded -rtsopts -with-rtsopts=-N
diff --git a/src/site.hs b/src/site.hs
@@ -1,16 +1,26 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedLists #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ScopedTypeVariables #-}
 
+import Data.Char (isLatin1)
+import Data.Data (Data)
 import Data.Default (def)
+import Data.List.NonEmpty (NonEmpty((:|)), groupBy, toList)
 import Data.Maybe (fromMaybe)
+import Data.Typeable (Typeable)
 import Hakyll
        (Compiler, Configuration(..), Context, Identifier, Item, Routes,
         applyAsTemplate, compile, composeRoutes, compressCssCompiler,
-        copyFileCompiler, dateField, defaultContext, field,
+        copyFileCompiler, dateField, defaultContext,
+        defaultHakyllReaderOptions, defaultHakyllWriterOptions, field,
         getMetadataField, getResourceBody, gsubRoute, hakyllWith, idRoute,
         itemIdentifier, listField, loadAll, loadAndApplyTemplate,
-        lookupString, match, metadataRoute, pandocCompiler, recentFirst,
-        relativizeUrls, route, setExtension, templateCompiler)
-
+        lookupString, match, metadataRoute, pandocCompilerWithTransform,
+        recentFirst, relativizeUrls, route, setExtension, templateCompiler)
+import Text.Pandoc.Definition (Inline(Space, Span, Str), Pandoc)
+import Text.Pandoc.Generic (bottomUp)
 
 -- | Change some of the default configuration variables.  This makes our
 -- project working directory a little cleaner.
@@ -74,7 +84,11 @@ main = hakyllWith hakyllConfig $ do
             let subHeadingCtx =
                     field "subHeadingContent" createSubHeadingContentForPost `mappend`
                     postCtx
-            pandocOut <- pandocCompiler
+            pandocOut <-
+                pandocCompilerWithTransform
+                  defaultHakyllReaderOptions
+                  defaultHakyllWriterOptions
+                  addSpaceAroundAsciiPandoc
             postTemplateOut <- loadAndApplyTemplate postTemplate subHeadingCtx pandocOut
             applyDefaultTemplate subHeadingCtx postTemplateOut
 
@@ -124,3 +138,160 @@ postsAndDraftsRoutes = metadataRoute $ \metadata ->
             gsubRoute "posts/" (const "drafts/") `composeRoutes`
             setExtension "html"
         Nothing -> setExtension "html"
+
+-- | All blocks of Latin text will be placed in @\<span\>@ tags with an
+-- @\"ascii\"@ class.
+addSpaceAroundAsciiPandoc :: Pandoc -> Pandoc
+addSpaceAroundAsciiPandoc = bottomUp (collapseAsciiSpan . addSpaceAroundAsciiInlines)
+
+collapseAsciiSpan :: [Inline] -> [Inline]
+collapseAsciiSpan (AsciiSpan innerInlinesA : Space : AsciiSpan innerInlinesB : other) =
+  let newInnerSpan = innerInlinesA ++ [Space] ++ innerInlinesB
+  in collapseAsciiSpan $ Span ("", ["ascii"], []) newInnerSpan : other
+collapseAsciiSpan (inline : inlines) = inline : collapseAsciiSpan inlines
+collapseAsciiSpan [] = []
+
+pattern AsciiSpan :: [Inline] -> Inline
+pattern AsciiSpan innerInlines <- Span ("", ["ascii"], []) innerInlines
+
+addSpaceAroundAsciiInlines :: [Inline] -> [Inline]
+addSpaceAroundAsciiInlines = concatMap addSpaceInline
+
+-- | Convert the 'String' in a 'Str' to a list of 'TextLang' using
+-- 'splitOnLanguage'.  Turn those 'TextLang' into 'Inline' using
+-- 'textLangToInline'.  Blocks of Latin text will be placed in @\<span\>@ tags
+-- with an @\"ascii\"@ class.
+addSpaceInline :: Inline -> [Inline]
+addSpaceInline (Str string) = textLangToInline <$> splitOnLanguage string
+addSpaceInline inline = [inline]
+
+-- | This is a tag around a 'String' representing whether the 'String' is Latin
+-- ('English') or 'Japanese'.  'String's that are neither Latin or 'Japanese'
+-- are treated as 'Japanese'.  See 'charLangToTextLang'.
+data TextLang
+  = Japanese String
+  | English String
+  deriving (Data, Eq, Read, Show, Typeable)
+
+-- | This is a tag around a 'Char' representing whether the 'Char' is Latin or
+-- Japanese.  Characters that are neither Latin or Japanese are treated as
+-- Japanese.  See 'charToCharLang'.
+data CharLang
+  = JapaneseChar Char
+  | EnglishChar Char
+  deriving (Data, Eq, Read, Show, Typeable)
+
+-- | Convert a 'TextLang' to an 'Inline'.  'Japanese' will be converted to a
+-- simple 'Str', while 'English' will be converted to a 'Span' with a
+-- @\"ascii\"@ attribute.
+--
+-- Japanese:
+--
+-- >>> textLangToInline $ Japanese "日本語"
+-- Str "\26085\26412\35486"
+--
+-- English:
+--
+-- >>> textLangToInline $ English "foobar"
+-- Span ("",["ascii"],[]) [Str "foobar"]
+textLangToInline :: TextLang -> Inline
+textLangToInline (Japanese string) = Str string
+textLangToInline (English string) = Span ("", ["ascii"], []) [Str string]
+
+-- | Split a 'String' into groups of 'TextLang'.
+--
+--
+-- Mix of Japanese and English:
+--
+-- >>> splitOnLanguage "今日Haskellを紹介"
+-- [Japanese "\20170\26085",English "Haskell",Japanese "\12434\32057\20171"]
+--
+-- Just English:
+--
+-- >>> splitOnLanguage "This is only English"
+-- [English "This is only English"]
+--
+-- Just Japanese:
+--
+-- >>> splitOnLanguage "日本語"
+-- [Japanese "\26085\26412\35486"]
+splitOnLanguage :: String -> [TextLang]
+splitOnLanguage [] = []
+splitOnLanguage (c:cs) =
+  fmap charLangToTextLang . groupByCharLang $ charToCharLang <$> (c :| cs)
+
+-- | Convert all Latin characters to 'EnglishChar' and all other characters to
+-- 'JapaneseChar'.
+--
+-- >>> charToCharLang 'x'
+-- EnglishChar 'x'
+-- >>> charToCharLang '本'
+-- JapaneseChar '\26412'
+charToCharLang :: Char -> CharLang
+charToCharLang c
+  | isLatin1 c = EnglishChar c
+  | otherwise = JapaneseChar c
+
+-- | Pull out the 'Char' from 'CharLang'.
+--
+-- >>> charLangToChar $ EnglishChar 'x'
+-- 'x'
+-- >>> charLangToChar $ JapaneseChar '本'
+-- '\26412'
+charLangToChar :: CharLang -> Char
+charLangToChar (JapaneseChar c) = c
+charLangToChar (EnglishChar c) = c
+
+-- | Group lists of 'CharLang' based on their language.
+--
+-- Test setup functions:
+--
+-- >>> toCharLang = fmap charToCharLang
+-- >>> fromCharLang = fmap (toList . fmap charLangToChar)
+--
+-- Mix of Japanese and English:
+--
+-- >>> let chars = 'f' :| "ooほげほげbar"
+-- >>> fromCharLang . groupByCharLang $ toCharLang chars
+-- ["foo","\12411\12370\12411\12370","bar"]
+--
+-- Just Japanese:
+--
+-- >>> let chars = 'ヤ' :| "ギ"
+-- >>> fromCharLang . groupByCharLang $ toCharLang chars
+-- ["\12516\12462"]
+--
+-- Just English:
+--
+-- >>> let chars = 'g' :| "oat"
+-- >>> fromCharLang . groupByCharLang $ toCharLang chars
+-- ["goat"]
+groupByCharLang :: NonEmpty CharLang -> [NonEmpty CharLang]
+groupByCharLang = groupBy f
+  where
+    f :: CharLang -> CharLang -> Bool
+    f JapaneseChar{} JapaneseChar{} = True
+    f EnglishChar{} EnglishChar{} = True
+    f _ _ = False
+
+-- | Convert groups of 'CharLang' into a 'TextLang'.  This determines the
+-- 'TextLang' by looking at the very first character in the group of
+-- 'CharLang'.
+--
+-- This function doesn't handle groups of 'CharLang' that are in a different
+-- language.
+--
+-- Group of 'EnglishChar' gets mapped to 'English':
+--
+-- >>> charLangToTextLang $ charToCharLang <$> 'f' :| "oobar"
+-- English "foobar"
+--
+-- Group of 'JapaneseChar' gets mapped to 'Japanese':
+--
+-- >>> charLangToTextLang $ charToCharLang <$> '交' :| "番"
+-- Japanese "\20132\30058"
+charLangToTextLang :: NonEmpty CharLang -> TextLang
+charLangToTextLang cs@(JapaneseChar{} :| _) =
+  Japanese . toList $ charLangToChar <$> cs
+charLangToTextLang cs@(EnglishChar{} :| _) =
+  English . toList $ charLangToChar <$> cs
diff --git a/test/DocTest.hs b/test/DocTest.hs
@@ -0,0 +1,40 @@
+
+module Main (main) where
+
+import Prelude
+
+import Data.Monoid ((<>))
+import System.FilePath.Glob (glob)
+import Test.DocTest (doctest)
+
+main :: IO ()
+main = glob "src/**/*.hs" >>= doDocTest
+
+doDocTest :: [String] -> IO ()
+doDocTest options = doctest $ options <> ghcExtensions
+
+ghcExtensions :: [String]
+ghcExtensions =
+    [
+    --   "-XConstraintKinds"
+    -- , "-XDataKinds"
+    -- ,"-XDeriveDataTypeable"
+    -- , "-XDeriveGeneric"
+    -- , "-XEmptyDataDecls"
+    -- , "-XFlexibleContexts"
+    -- , "-XFlexibleInstances"
+    -- , "-XGADTs"
+    -- , "-XGeneralizedNewtypeDeriving"
+    -- , "-XInstanceSigs"
+    -- , "-XMultiParamTypeClasses"
+    -- , "-XNoImplicitPrelude"
+    -- , "-XOverloadedStrings"
+    -- , "-XPolyKinds"
+    -- , "-XRankNTypes"
+    -- , "-XRecordWildCards"
+    -- , "-XScopedTypeVariables"
+    -- , "-XStandaloneDeriving"
+    -- , "-XTupleSections"
+    -- , "-XTypeFamilies"
+    -- , "-XTypeOperators"
+    ]