fix: fix energy_carbon_hb

akfamily · Jun 9, 2024 · 3244d04 · 3244d04
1 parent 7c802ac
commit 3244d04
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 143 deletions.
diff --git a/akshare/energy/energy_carbon.py b/akshare/energy/energy_carbon.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
 """
-Date: 2022/5/25 16:05
+Date: 2024/6/9 16:00
 Desc: 碳排放交易
 北京市碳排放权电子交易平台-北京市碳排放权公开交易行情
 https://www.bjets.com.cn/article/jyxx/
@@ -18,15 +18,17 @@
 广州碳排放权交易中心-行情信息
 http://www.cnemission.com/article/hqxx/
 """
-import re
+
 from functools import lru_cache
+from io import StringIO
 
 import pandas as pd
 import requests
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 
 from akshare.utils import demjson
+from akshare.utils.cons import headers
 
 
 @lru_cache()
@@ -39,9 +41,7 @@ def energy_carbon_domestic(symbol: str = "湖北") -> pd.DataFrame:
     :return: 行情信息
     :rtype: pandas.DataFrame
     """
-    url = (
-        "http://k.tanjiaoyi.com:8080/KDataController/getHouseDatasInAverage.do"
-    )
+    url = "http://k.tanjiaoyi.com:8080/KDataController/getHouseDatasInAverage.do"
     params = {
         "lcnK": "53f75bfcefff58e4046ccfa42171636c",
         "brand": "TAN",
@@ -69,10 +69,10 @@ def energy_carbon_domestic(symbol: str = "湖北") -> pd.DataFrame:
             "地点",
         ]
     ]
-    temp_df["日期"] = pd.to_datetime(temp_df["日期"]).dt.date
-    temp_df["成交价"] = pd.to_numeric(temp_df["成交价"])
-    temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
-    temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
+    temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
+    temp_df["成交价"] = pd.to_numeric(temp_df["成交价"], errors="coerce")
+    temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
+    temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
     return temp_df
 
 
@@ -85,8 +85,8 @@ def energy_carbon_bj() -> pd.DataFrame:
     :rtype: pandas.DataFrame
     """
     url = "https://www.bjets.com.cn/article/jyxx/"
-    r = requests.get(url)
-    soup = BeautifulSoup(r.text, "lxml")
+    r = requests.get(url, verify=False, headers=headers)
+    soup = BeautifulSoup(r.text, features="lxml")
     total_page = (
         soup.find("table")
         .find("script")
@@ -104,10 +104,10 @@ def energy_carbon_bj() -> pd.DataFrame:
         if i == 1:
             i = ""
         url = f"https://www.bjets.com.cn/article/jyxx/?{i}"
-        r = requests.get(url)
+        r = requests.get(url, verify=False, headers=headers)
         r.encoding = "utf-8"
-        df = pd.read_html(r.text)[0]
-        temp_df = pd.concat([temp_df, df], ignore_index=True)
+        df = pd.read_html(StringIO(r.text))[0]
+        temp_df = pd.concat(objs=[temp_df, df], ignore_index=True)
     temp_df.columns = ["日期", "成交量", "成交均价", "成交额"]
     temp_df["成交单位"] = (
         temp_df["成交额"]
@@ -125,12 +125,12 @@ def energy_carbon_bj() -> pd.DataFrame:
         .str.split("（", expand=True)
         .iloc[:, 0]
     )
-    temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
-    temp_df["成交均价"] = pd.to_numeric(temp_df["成交均价"])
+    temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
+    temp_df["成交均价"] = pd.to_numeric(temp_df["成交均价"], errors="coerce")
     temp_df["成交额"] = temp_df["成交额"].str.replace(",", "")
     temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
-    temp_df["日期"] = pd.to_datetime(temp_df["日期"]).dt.date
-    temp_df.sort_values("日期", inplace=True)
+    temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
+    temp_df.sort_values(by="日期", inplace=True)
     temp_df.reset_index(inplace=True, drop=True)
     return temp_df
 
@@ -144,28 +144,26 @@ def energy_carbon_sz() -> pd.DataFrame:
     :rtype: pandas.DataFrame
     """
     url = "http://www.cerx.cn/dailynewsCN/index.htm"
-    r = requests.get(url)
-    soup = BeautifulSoup(r.text, "lxml")
-    page_num = int(
-        soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text
-    )
-    big_df = pd.read_html(r.text, header=0)[0]
+    r = requests.get(url, headers=headers)
+    soup = BeautifulSoup(r.text, features="lxml")
+    page_num = int(soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text)
+    big_df = pd.read_html(StringIO(r.text), header=0)[0]
     for page in tqdm(
         range(2, page_num + 1), desc="Please wait for a moment", leave=False
     ):
         url = f"http://www.cerx.cn/dailynewsCN/index_{page}.htm"
-        r = requests.get(url)
-        temp_df = pd.read_html(r.text, header=0)[0]
-        big_df = pd.concat([big_df, temp_df], ignore_index=True)
-    big_df["交易日期"] = pd.to_datetime(big_df["交易日期"]).dt.date
-    big_df["开盘价"] = pd.to_numeric(big_df["开盘价"])
-    big_df["最高价"] = pd.to_numeric(big_df["最高价"])
-    big_df["最低价"] = pd.to_numeric(big_df["最低价"])
-    big_df["成交均价"] = pd.to_numeric(big_df["成交均价"])
-    big_df["收盘价"] = pd.to_numeric(big_df["收盘价"])
-    big_df["成交量"] = pd.to_numeric(big_df["成交量"])
-    big_df["成交额"] = pd.to_numeric(big_df["成交额"])
-    big_df.sort_values("交易日期", inplace=True)
+        r = requests.get(url, headers=headers)
+        temp_df = pd.read_html(StringIO(r.text), header=0)[0]
+        big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True)
+    big_df["交易日期"] = pd.to_datetime(big_df["交易日期"], errors="coerce").dt.date
+    big_df["开盘价"] = pd.to_numeric(big_df["开盘价"], errors="coerce")
+    big_df["最高价"] = pd.to_numeric(big_df["最高价"], errors="coerce")
+    big_df["最低价"] = pd.to_numeric(big_df["最低价"], errors="coerce")
+    big_df["成交均价"] = pd.to_numeric(big_df["成交均价"], errors="coerce")
+    big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce")
+    big_df["成交量"] = pd.to_numeric(big_df["成交量"], errors="coerce")
+    big_df["成交额"] = pd.to_numeric(big_df["成交额"], errors="coerce")
+    big_df.sort_values(by="交易日期", inplace=True)
     big_df.reset_index(inplace=True, drop=True)
     return big_df
 
@@ -179,28 +177,26 @@ def energy_carbon_eu() -> pd.DataFrame:
     :rtype: pandas.DataFrame
     """
     url = "http://www.cerx.cn/dailynewsOuter/index.htm"
-    r = requests.get(url)
-    soup = BeautifulSoup(r.text, "lxml")
-    page_num = int(
-        soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text
-    )
-    big_df = pd.read_html(r.text, header=0)[0]
+    r = requests.get(url, headers=headers)
+    soup = BeautifulSoup(r.text, features="lxml")
+    page_num = int(soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text)
+    big_df = pd.read_html(StringIO(r.text), header=0)[0]
     for page in tqdm(
         range(2, page_num + 1), desc="Please wait for a moment", leave=False
     ):
         url = f"http://www.cerx.cn/dailynewsOuter/index_{page}.htm"
         r = requests.get(url)
-        temp_df = pd.read_html(r.text, header=0)[0]
-        big_df = pd.concat([big_df, temp_df], ignore_index=True)
-    big_df["交易日期"] = pd.to_datetime(big_df["交易日期"]).dt.date
-    big_df["开盘价"] = pd.to_numeric(big_df["开盘价"])
-    big_df["最高价"] = pd.to_numeric(big_df["最高价"])
-    big_df["最低价"] = pd.to_numeric(big_df["最低价"])
-    big_df["成交均价"] = pd.to_numeric(big_df["成交均价"])
-    big_df["收盘价"] = pd.to_numeric(big_df["收盘价"])
-    big_df["成交量"] = pd.to_numeric(big_df["成交量"])
-    big_df["成交额"] = pd.to_numeric(big_df["成交额"])
-    big_df.sort_values("交易日期", inplace=True)
+        temp_df = pd.read_html(StringIO(r.text), header=0)[0]
+        big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True)
+    big_df["交易日期"] = pd.to_datetime(big_df["交易日期"], errors="coerce").dt.date
+    big_df["开盘价"] = pd.to_numeric(big_df["开盘价"], errors="coerce")
+    big_df["最高价"] = pd.to_numeric(big_df["最高价"], errors="coerce")
+    big_df["最低价"] = pd.to_numeric(big_df["最低价"], errors="coerce")
+    big_df["成交均价"] = pd.to_numeric(big_df["成交均价"], errors="coerce")
+    big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce")
+    big_df["成交量"] = pd.to_numeric(big_df["成交量"], errors="coerce")
+    big_df["成交额"] = pd.to_numeric(big_df["成交额"], errors="coerce")
+    big_df.sort_values(by="交易日期", inplace=True)
     big_df.reset_index(inplace=True, drop=True)
     return big_df
 
@@ -213,63 +209,43 @@ def energy_carbon_hb() -> pd.DataFrame:
     :return: 现货交易数据-配额-每日概况行情数据
     :rtype: pandas.DataFrame
     """
-    url = "http://www.hbets.cn/list/13.html"
-    r = requests.get(url)
-    soup = BeautifulSoup(r.text, "lxml")
-    page_string = (
-        soup.find("div", attrs={"class": "page"}).find_all("span")[-1].text
+    url = "https://www.hbets.cn/"
+    r = requests.get(url, headers=headers)
+    soup = BeautifulSoup(r.text, features="lxml")
+    data_text = (
+        soup.find(name="div", attrs={"class": "threeLeft"}).find_all("script")[1].text
     )
-    page_num = int(re.findall(r"\d+", page_string)[-1])
-    columns = [
-        item.text
-        for item in soup.find("ul", attrs={"class": "title"}).find_all("li")
-    ]
-    big_df = pd.DataFrame()
-    for page in tqdm(
-        range(1, page_num + 1), desc="Please wait for a moment", leave=False
-    ):
-        url = f"http://www.hbets.cn/list/13.html"
-        params = {"page": page}
-        r = requests.get(url, params=params)
-        soup = BeautifulSoup(r.text, "lxml")
-        page_node = [
-            item
-            for item in soup.find(attrs={"class": "future_table"}).find_all(
-                attrs={"class": "cont"}
-            )
+    start_pos = data_text.find("cjj = '[") + 7  # 找到 JSON 数组开始的位置
+    end_pos = data_text.rfind("cjj =") - 31  # 找到 JSON 数组结束的位置
+    from akshare.utils import demjson
+
+    data_json = demjson.decode(data_text[start_pos:end_pos])
+    temp_df = pd.DataFrame.from_dict(data_json)
+    temp_df.rename(
+        columns={
+            "riqi": "日期",
+            "cjj": "成交价",
+            "cjl": "成交量",
+            "zx": "最新",
+            "zd": "涨跌",
+        },
+        inplace=True,
+    )
+    temp_df = temp_df[
+        [
+            "日期",
+            "成交价",
+            "成交量",
+            "最新",
+            "涨跌",
         ]
-        temp_list = []
-        for item in page_node:
-            temp_inner_list = []
-            for inner_item in item.find_all("li"):
-                temp_inner_list.append(inner_item.text)
-            temp_list.append(temp_inner_list)
-        temp_df = pd.DataFrame(temp_list)
-        big_df = pd.concat([big_df, temp_df], ignore_index=True)
-    big_df.columns = columns
-    big_df["交易品种"] = big_df["交易品种"].str.strip()
-    big_df["日期"] = pd.to_datetime(big_df["日期"]).dt.date
-    big_df["最新"] = pd.to_numeric(big_df["最新"])
-    big_df["涨跌幅"] = big_df["涨跌幅"].str.strip("%").str.strip()
-    big_df["涨跌幅"] = big_df["涨跌幅"].str.strip("%")
-    big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"])
-    big_df["最高"] = big_df["最高"].str.replace("--", "")
-    big_df["最高"] = pd.to_numeric(big_df["最高"])
-    big_df["最低"] = big_df["最低"].str.replace("--", "")
-    big_df["最低"] = pd.to_numeric(big_df["最低"])
-    big_df["成交量"] = big_df["成交量"].str.replace("--", "")
-    big_df["成交量"] = pd.to_numeric(big_df["成交量"])
-    big_df["成交额"] = big_df["成交额"].str.replace("--", "")
-    big_df["成交额"] = pd.to_numeric(big_df["成交额"])
-    big_df["昨收盘价"] = big_df["昨收盘价"].str.replace("--", "")
-    big_df["昨收盘价"] = pd.to_numeric(big_df["昨收盘价"])
-    big_df.dropna(subset=["最新"], inplace=True)
-    big_df.sort_values("日期", inplace=True)
-    big_df = big_df[
-        ["日期", "交易品种", "最新", "涨跌幅", "最高", "最低", "成交量", "成交额", "昨收盘价"]
     ]
-    big_df.reset_index(inplace=True, drop=True)
-    return big_df
+    temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
+    temp_df["成交价"] = pd.to_numeric(temp_df["成交价"], errors="coerce")
+    temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
+    temp_df["最新"] = pd.to_numeric(temp_df["最新"], errors="coerce")
+    temp_df["涨跌"] = pd.to_numeric(temp_df["涨跌"], errors="coerce")
+    return temp_df
 
 
 @lru_cache()
@@ -287,7 +263,7 @@ def energy_carbon_gz() -> pd.DataFrame:
         "endTime": "2030-09-12",
     }
     r = requests.get(url, params=params)
-    temp_df = pd.read_html(r.text, header=0)[1]
+    temp_df = pd.read_html(StringIO(r.text), header=0)[1]
     temp_df.columns = [
         "日期",
         "品种",
@@ -300,17 +276,19 @@ def energy_carbon_gz() -> pd.DataFrame:
         "成交数量",
         "成交金额",
     ]
-    temp_df["日期"] = pd.to_datetime(temp_df["日期"], format="%Y%m%d").dt.date
-    temp_df["开盘价"] = pd.to_numeric(temp_df["开盘价"])
-    temp_df["收盘价"] = pd.to_numeric(temp_df["收盘价"])
-    temp_df["最高价"] = pd.to_numeric(temp_df["最高价"])
-    temp_df["最低价"] = pd.to_numeric(temp_df["最低价"])
-    temp_df["涨跌"] = pd.to_numeric(temp_df["涨跌"])
+    temp_df["日期"] = pd.to_datetime(
+        temp_df["日期"], format="%Y%m%d", errors="coerce"
+    ).dt.date
+    temp_df["开盘价"] = pd.to_numeric(temp_df["开盘价"], errors="coerce")
+    temp_df["收盘价"] = pd.to_numeric(temp_df["收盘价"], errors="coerce")
+    temp_df["最高价"] = pd.to_numeric(temp_df["最高价"], errors="coerce")
+    temp_df["最低价"] = pd.to_numeric(temp_df["最低价"], errors="coerce")
+    temp_df["涨跌"] = pd.to_numeric(temp_df["涨跌"], errors="coerce")
     temp_df["涨跌幅"] = temp_df["涨跌幅"].str.strip("%")
-    temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
-    temp_df["成交数量"] = pd.to_numeric(temp_df["成交数量"])
-    temp_df["成交金额"] = pd.to_numeric(temp_df["成交金额"])
-    temp_df.sort_values("日期", inplace=True)
+    temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
+    temp_df["成交数量"] = pd.to_numeric(temp_df["成交数量"], errors="coerce")
+    temp_df["成交金额"] = pd.to_numeric(temp_df["成交金额"], errors="coerce")
+    temp_df.sort_values(by="日期", inplace=True)
     temp_df.reset_index(inplace=True, drop=True)
     return temp_df
 

diff --git a/docs/data/energy/energy.md b/docs/data/energy/energy.md
@@ -226,9 +226,9 @@ print(energy_carbon_eu_df)
 
 目标地址: http://www.cerx.cn/dailynewsOuter/index.htm
 
-描述: 湖北碳排放权交易中心-现货交易数据-配额-每日概况
+描述: 湖北碳排放权交易中心-碳排放权交易数据
 
-限量: 返回从 2017-04-05 至今的所有历史数据
+限量: 返回从 2014-04-02 至今的所有历史数据
 
 输入参数
 
@@ -238,17 +238,13 @@ print(energy_carbon_eu_df)
 
 输出参数
 
-| 名称   | 类型      | 描述      |
-|------|---------|---------|
-| 日期   | object  | -       |
-| 交易品种 | object  | -       |
-| 最新   | float64 | -       |
-| 涨跌幅  | float64 | 注意单位: % |
-| 最高   | float64 | -       |
-| 最低   | float64 | -       |
-| 成交量  | float64 | -       |
-| 成交额  | float64 | -       |
-| 昨收盘价 | float64 | -       |
+| 名称  | 类型      | 描述 |
+|-----|---------|----|
+| 日期  | object  | -  |
+| 成交价 | float64 | -  |
+| 成交量 | float64 | -  |
+| 最新  | float64 | -  |
+| 涨跌  | float64 | -  |
 
 接口示例
 
@@ -262,18 +258,19 @@ print(energy_carbon_hb_df)
 数据示例
 
 ```
-          日期  交易品种   最新  涨跌幅  最高   最低     成交量      成交额   昨收盘价
-0     2017-04-05  HBEA  16.55 -0.30  17.90  16.50   9382.0   167152.08    NaN
-1     2017-04-06  HBEA  16.55  0.00  16.55  15.50  11126.0   179145.25  16.55
-2     2017-04-07  HBEA  16.03 -3.14  17.00  16.01  38449.0   637564.37  16.55
-3     2017-04-10  HBEA  16.00 -0.19  16.48  16.00  11418.0   184092.65  16.03
-4     2017-04-11  HBEA  15.89 -0.69  16.19  15.51  34554.0   551255.02  16.00
-          ...   ...    ...   ...    ...    ...      ...         ...    ...
-1142  2022-02-28  HBEA  51.48 -0.41  54.00  50.00  14478.0   742153.94  51.69
-1143  2022-03-01  HBEA  50.50 -1.90  53.00  49.30  16130.0   811078.70  51.48
-1144  2022-03-02  HBEA  50.54  0.08  51.95  47.51  11256.0   564317.92  50.50
-1145  2022-03-03  HBEA  49.11 -2.83  51.90  48.01  31508.0  1550420.55  50.54
-1146  2022-03-04  HBEA  49.30  0.39  50.50  47.00  11774.0   565137.36  49.11
+          日期    成交价    成交量   最新   涨跌
+0     2014-04-02  21.00  510020.0  0.0  0.0
+1     2014-04-03  24.20   51468.0  0.0  0.0
+2     2014-04-04  26.61  304125.0  0.0  0.0
+3     2014-04-08  26.57  112057.0  0.0  0.0
+4     2014-04-09  25.07   77473.0  0.0  0.0
+...          ...    ...       ...  ...  ...
+2433  2024-06-03  41.72    1314.0  0.0  0.0
+2434  2024-06-04  42.01    3260.0  0.0  0.0
+2435  2024-06-05  42.09    7031.0  0.0  0.0
+2436  2024-06-06  41.97    3691.0  0.0  0.0
+2437  2024-06-07  42.41   17613.0  0.0  0.0
+[2438 rows x 5 columns]
 ```
 
 #### 碳排放权-广州