diff --git a/.hlint.yaml b/.hlint.yaml
Binary files a/.hlint.yaml and b/.hlint.yaml differ
diff --git a/.stylish-haskell.yaml b/.stylish-haskell.yaml
--- a/.stylish-haskell.yaml
+++ b/.stylish-haskell.yaml
@@ -57,43 +57,43 @@ steps:
# - "," in fields is always aligned with "{"
# - "}" is likewise always aligned with "{"
- # - records:
+ - records:
# # How to format equals sign between type constructor and data constructor.
# # Possible values:
# # - "same_line" -- leave "=" AND data constructor on the same line as the type constructor.
# # - "indent N" -- insert a new line and N spaces from the beginning of the next line.
- # equals: "indent 2"
+ equals: "indent 2"
# # How to format first field of each record constructor.
# # Possible values:
# # - "same_line" -- "{" and first field goes on the same line as the data constructor.
# # - "indent N" -- insert a new line and N spaces from the beginning of the data constructor
- # first_field: "indent 2"
+ first_field: "indent 2"
# # How many spaces to insert between the column with "," and the beginning of the comment in the next line.
- # field_comment: 2
+ field_comment: 2
# # How many spaces to insert before "deriving" clause. Deriving clauses are always on separate lines.
- # deriving: 2
+ deriving: 2
# # How many spaces to insert before "via" clause counted from indentation of deriving clause
# # Possible values:
# # - "same_line" -- "via" part goes on the same line as "deriving" keyword.
# # - "indent N" -- insert a new line and N spaces from the beginning of "deriving" keyword.
- # via: "indent 2"
+ via: "indent 2"
# # Sort typeclass names in the "deriving" list alphabetically.
- # sort_deriving: true
+ sort_deriving: true
# # Wheter or not to break enums onto several lines
# #
# # Default: false
- # break_enums: false
+ break_enums: false
# # Whether or not to break single constructor data types before `=` sign
# #
# # Default: true
- # break_single_constructors: true
+ break_single_constructors: true
# # Whether or not to curry constraints on function.
# #
@@ -102,7 +102,7 @@ steps:
# # Instead of @allValues :: (Enum a, Bounded a) => Proxy a -> [a]@
# #
# # Default: false
- # curried_context: false
+ curried_context: true
# Align the right hand side of some elements. This is quite conservative
# and only applies to statements where each element occupies a single
@@ -240,7 +240,7 @@ steps:
# Useful for 'file' and 'group' align settings.
# Default: 4
- list_padding: 4
+ list_padding: 2
# Separate lists option affects formatting of import list for type
# or class. The only difference is single space between type and list
@@ -330,10 +330,10 @@ steps:
language_prefix: LANGUAGE
# Replace tabs by spaces. This is disabled by default.
- # - tabs:
- # # Number of spaces to use for each tab. Default: 8, as specified by the
- # # Haskell report.
- # spaces: 8
+ - tabs:
+ # Number of spaces to use for each tab. Default: 8, as specified by the
+ # Haskell report.
+ spaces: 2
# Remove trailing whitespace
- trailing_whitespace: {}
@@ -341,7 +341,7 @@ steps:
# Squash multiple spaces between the left and right hand sides of some
# elements into single spaces. Basically, this undoes the effect of
# simple_align but is a bit less conservative.
- # - squash: {}
+ # - squash: {}
# A common setting is the number of columns (parts of) code will be wrapped
# to. Different steps take this into account.
diff --git a/hie.yaml b/hie.yaml
Binary files a/hie.yaml and b/hie.yaml differ
diff --git a/packages/grappler/CHANGELOG.md b/packages/grappler/CHANGELOG.md
--- /dev/null
+++ b/packages/grappler/CHANGELOG.md
@@ -0,0 +1,6 @@
+# grappler
+- init package
\ No newline at end of file
diff --git a/packages/grappler/LICENSE b/packages/grappler/LICENSE
--- /dev/null
+++ b/packages/grappler/LICENSE
@@ -0,0 +1,15 @@
+Copyright (C) 2021 Cigaret
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program. If not, see .
\ No newline at end of file
diff --git a/packages/grappler/README.md b/packages/grappler/README.md
--- /dev/null
+++ b/packages/grappler/README.md
@@ -0,0 +1,12 @@
+# grappler
+- [首页 > 政策 > 最新](http://www.gov.cn/zhengce/zuixin.htm)
+ 可能在域名解析层面设置了访问限制,直接访问 IP 地址可以不设代理拿到数据([IP 地址](。
+- [太原市人民政府 > 政府信息公开 > 法定主动公开内容 > 国民经济和社会发展规划](http://www.taiyuan.gov.cn/fzlm/gkmlpt/zdgk/index.shtml?chan=25)
+ 很奇怪,这个没有访问限制,但数据是动态加载的,需要直接请求接口。
+ [IP 地址](。
+ [数据地址](http://taiyuan.gov.cn/intertidwebapp/govChanInfo/getDocuments?Index=1&pageSize=20&siteId=1&ChannelType=1&KeyWord=&KeyWordType=&chanId=25&order=1)。
diff --git a/packages/grappler/Setup.hs b/packages/grappler/Setup.hs
--- /dev/null
+++ b/packages/grappler/Setup.hs
@@ -0,0 +1,3 @@
+import Distribution.Simple
+main = defaultMain
\ No newline at end of file
diff --git a/packages/grappler/app/Gov/Taiyuan.hs b/packages/grappler/app/Gov/Taiyuan.hs
--- /dev/null
+++ b/packages/grappler/app/Gov/Taiyuan.hs
@@ -0,0 +1,41 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Gov.Taiyuan where
+import qualified Data.ByteString.Lazy.UTF8 as UTF8
+import Data.Foldable
+import Network.HTTP.Client
+import Network.HTTP.Types.Header
+type UrlString = String
+type TargetUrls = [UrlString]
+targetUrls :: TargetUrls
+targetUrls = [
+ "http://taiyuan.gov.cn/intertidwebapp/govChanInfo/getDocuments?Index=1&pageSize=20&siteId=1&ChannelType=1&KeyWord=&KeyWordType=&chanId=25&order=1"
+ ]
+-- 添加通用的 Proxy 配置
+setCommonProxy :: ManagerSettings -> ManagerSettings
+setCommonProxy = managerSetProxy (useProxy Proxy {
+ proxyHost = "",
+ proxyPort = 10809
+ })
+-- 添加通用的 Manager 设置
+setCommonManager :: ManagerSettings -> ManagerSettings
+setCommonManager settings = settings { managerResponseTimeout = responseTimeoutMicro 30000000 }
+prepareRequests = map (\url -> do
+ manager <- newManager $ setCommonManager defaultManagerSettings
+ initRequest <- parseRequest url
+ let request = initRequest {
+ method = "GET",
+ requestHeaders = [
+ -- 不设置 User-Agent 也可以拿到数据
+ (hUserAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 Edg/93.0.961.38")
+ ]
+ }
+ response <- httpLbs request manager
+ return (url, request, response)
+ )
diff --git a/packages/grappler/app/Gov/Top.hs b/packages/grappler/app/Gov/Top.hs
--- /dev/null
+++ b/packages/grappler/app/Gov/Top.hs
@@ -0,0 +1,132 @@
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Gov.Top
+ ( grap
+ ) where
+import Control.Concurrent
+import Control.Monad
+import qualified Data.ByteString.Lazy.UTF8 as UTF8
+import Data.Char
+import Network.HTTP.Client
+import Network.HTTP.Types.Header
+import Text.HTML.TagSoup
+type Name = String
+type UrlString = String
+data Target
+ = Target
+ { name :: Name
+ , url :: UrlString
+ }
+ deriving (Show)
+type Targets = [Target]
+targets :: Targets
+targets = [Target "Zhengce" "http://www.gov.cn/zhengce/zuixin.htm"]
+-- 添加通用的 Proxy 配置
+setCommonProxy :: ManagerSettings -> ManagerSettings
+setCommonProxy = managerSetProxy (useProxy Proxy {
+ proxyHost = "",
+ proxyPort = 10809
+ })
+-- 添加通用的 Manager 设置
+setCommonManager :: ManagerSettings -> ManagerSettings
+setCommonManager settings = settings { managerResponseTimeout = responseTimeoutMicro (30 * 1000000) }
+type RequestResponse = Response UTF8.ByteString
+data RequestResult
+ = RequestResult
+ { target :: Target
+ , request :: Request
+ , response :: RequestResponse
+ }
+ deriving (Show)
+prepareRequests :: [Target] -> [IO RequestResult]
+prepareRequests = map (\target -> do
+ manager <- newManager $ setCommonManager defaultManagerSettings
+ initRequest <- parseRequest $ url target
+ let request = initRequest {
+ method = "GET",
+ requestHeaders = [
+ -- 不设置 User-Agent 也可以拿到数据
+ (hUserAgent, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36 Edg/93.0.961.38")
+ ]
+ }
+ response <- httpLbs request manager
+ return (RequestResult target request response)
+ )
+execRequest :: IO [(Target, Request, RequestResponse, String)]
+execRequest = do
+ responses <- sequenceA $ prepareRequests targets
+ traverse (\RequestResult { target, request, response } -> do
+ putStrLn $ "The target url was:" ++ url target
+ putStrLn $ "The request info was:" ++ show request
+ putStrLn $ "The response status was: " ++ show (responseStatus response)
+ let body = UTF8.toString $ responseBody response
+ writeFile ("./packages/grappler/data/" ++ "TopGov" ++ ".txt") body
+ putStrLn "Done"
+ return (target, request, response, body)) responses
+parseBody :: String -> [Tag String]
+parseBody = parseTags
+extractList :: [Tag String] -> [[Tag String]]
+extractList =
+ map (
+ concat .
+ (\tags -> [
+ (take 2 . dropWhile (~/= ("" :: String))) tags,
+ (take 1 . drop 1 . dropWhile (~/= TagOpen "span" [("class" :: String,"date")])) tags
+ ]) .
+ takeWhile (~/= ("" :: String))
+ ) .
+ sections (~== ("