-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathminify.hs
executable file
·106 lines (90 loc) · 3.22 KB
/
minify.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env runhaskell
-- fgaz's minifier
--
-- Copyright Francesco Gazzetta
-- SPDX-License-Identifier: EUPL-1.2
--
-- Like the other one, it needs explicit block syntax (semicolons and braces).
-- NOTE: It will split string literals containing spaces.
import Data.List (isPrefixOf, foldl')
import Data.Char (isSpace, isAlphaNum, isAscii)
import Data.Function (on)
main :: IO ()
main = interact $ \text ->
let (shebang, program) =
if "#!" `isPrefixOf` text
then (Just $ takeWhile (/='\n') text, dropWhile (/='\n') text)
else (Nothing, text)
in maybe "" (<>"\n") shebang <> minify program <> "\n"
minify :: String -> String
minify = mkLines
. filter (not . isSpace . head)
. groupBy' (not .: canTouch `on` characterClass)
. unlines
. filter (not . isExtra)
. lines
-- NOTE: tweak as needed
isExtra :: String -> Bool
isExtra s = any ($ s)
[ isComment
, isType
, isSignature
]
isComment :: String -> Bool
isComment = isPrefixOf "--" . dropWhile isSpace
isType :: String -> Bool
isType = isPrefixOf "type"
isSignature :: String -> Bool
isSignature = elem "::" . words
-- | Basically
-- unwords . fmap concat . groupBy' (canTouch `on` (characterClass . head))
-- but splits the output into lines.
-- O(n^2) due to '<>' and 'last' in 'addToken', but inputs are going to be short
-- anyway. For now at least.
--
-- TODO make it stream
mkLines :: [String] -> String
mkLines = fst . foldl' addToken ("", 0)
addToken :: (String, Int) -> String -> (String, Int)
addToken (str, lineLen) token
| lineLen + length spacedToken > 80 =
if token `elem` [";", "{"]
then (str <> "\n", 0)
else (str <> "\n " <> token, length token + 1)
| otherwise = (str <> spacedToken, lineLen + length spacedToken)
where spacedToken
| null str || null token
|| (canTouch `on` characterClass) (last str) (head token)
= token
| otherwise = " " <> token
data CharacterClass = IdentifierOrLit | Operator | Dot | Special deriving Eq
characterClass :: Char -> CharacterClass
characterClass '.' = Dot
characterClass c | isAlphaNum c = IdentifierOrLit
| c `elem` "\"'_" = IdentifierOrLit
| c `elem` "!#$%&*+/<=>?@\\^|-~:" = Operator
| not $ isAscii c = Operator
| c `elem` "[](),;{}" = Special
| isSpace c = Special
characterClass c = error $ "Unknown character: " ++ show c
-- Check if splitting or joining the characters does not change their meaning
canTouch :: CharacterClass -> CharacterClass -> Bool
canTouch Special _ = True
canTouch _ Special = True
-- . is both used for operators and for qualified names
canTouch Dot _ = False
canTouch _ Dot = False
canTouch a b = a /= b
-- Utilities
------------
(.:) :: (b -> c) -> (a1 -> a2 -> b) -> a1 -> a2 -> c
(.:) = (.).(.)
-- | Like groupBy, but equality is not transitive
groupBy' :: (a -> a -> Bool) -> [a] -> [[a]]
groupBy' _ [] = []
groupBy' eq (x:xs) | all (eq x) (take 1 xs) =
let (gr, grs) = case groupBy' eq xs of
gr':grs' -> (gr', grs')
[] -> ([], [])
in (x : gr) : grs
| otherwise = [x] : groupBy' eq xs