Skip to content

Commit

Permalink
fix: fix energy_carbon_hb
Browse files Browse the repository at this point in the history
  • Loading branch information
albertandking committed Jun 9, 2024
1 parent 7c802ac commit 3244d04
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 143 deletions.
214 changes: 96 additions & 118 deletions akshare/energy/energy_carbon.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
Date: 2022/5/25 16:05
Date: 2024/6/9 16:00
Desc: 碳排放交易
北京市碳排放权电子交易平台-北京市碳排放权公开交易行情
https://www.bjets.com.cn/article/jyxx/
Expand All @@ -18,15 +18,17 @@
广州碳排放权交易中心-行情信息
http://www.cnemission.com/article/hqxx/
"""
import re

from functools import lru_cache
from io import StringIO

import pandas as pd
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm

from akshare.utils import demjson
from akshare.utils.cons import headers


@lru_cache()
Expand All @@ -39,9 +41,7 @@ def energy_carbon_domestic(symbol: str = "湖北") -> pd.DataFrame:
:return: 行情信息
:rtype: pandas.DataFrame
"""
url = (
"http://k.tanjiaoyi.com:8080/KDataController/getHouseDatasInAverage.do"
)
url = "http://k.tanjiaoyi.com:8080/KDataController/getHouseDatasInAverage.do"
params = {
"lcnK": "53f75bfcefff58e4046ccfa42171636c",
"brand": "TAN",
Expand Down Expand Up @@ -69,10 +69,10 @@ def energy_carbon_domestic(symbol: str = "湖北") -> pd.DataFrame:
"地点",
]
]
temp_df["日期"] = pd.to_datetime(temp_df["日期"]).dt.date
temp_df["成交价"] = pd.to_numeric(temp_df["成交价"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
temp_df["成交价"] = pd.to_numeric(temp_df["成交价"], errors="coerce")
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
return temp_df


Expand All @@ -85,8 +85,8 @@ def energy_carbon_bj() -> pd.DataFrame:
:rtype: pandas.DataFrame
"""
url = "https://www.bjets.com.cn/article/jyxx/"
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
r = requests.get(url, verify=False, headers=headers)
soup = BeautifulSoup(r.text, features="lxml")
total_page = (
soup.find("table")
.find("script")
Expand All @@ -104,10 +104,10 @@ def energy_carbon_bj() -> pd.DataFrame:
if i == 1:
i = ""
url = f"https://www.bjets.com.cn/article/jyxx/?{i}"
r = requests.get(url)
r = requests.get(url, verify=False, headers=headers)
r.encoding = "utf-8"
df = pd.read_html(r.text)[0]
temp_df = pd.concat([temp_df, df], ignore_index=True)
df = pd.read_html(StringIO(r.text))[0]
temp_df = pd.concat(objs=[temp_df, df], ignore_index=True)
temp_df.columns = ["日期", "成交量", "成交均价", "成交额"]
temp_df["成交单位"] = (
temp_df["成交额"]
Expand All @@ -125,12 +125,12 @@ def energy_carbon_bj() -> pd.DataFrame:
.str.split("(", expand=True)
.iloc[:, 0]
)
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
temp_df["成交均价"] = pd.to_numeric(temp_df["成交均价"])
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
temp_df["成交均价"] = pd.to_numeric(temp_df["成交均价"], errors="coerce")
temp_df["成交额"] = temp_df["成交额"].str.replace(",", "")
temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
temp_df["日期"] = pd.to_datetime(temp_df["日期"]).dt.date
temp_df.sort_values("日期", inplace=True)
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
temp_df.sort_values(by="日期", inplace=True)
temp_df.reset_index(inplace=True, drop=True)
return temp_df

Expand All @@ -144,28 +144,26 @@ def energy_carbon_sz() -> pd.DataFrame:
:rtype: pandas.DataFrame
"""
url = "http://www.cerx.cn/dailynewsCN/index.htm"
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
page_num = int(
soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text
)
big_df = pd.read_html(r.text, header=0)[0]
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, features="lxml")
page_num = int(soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text)
big_df = pd.read_html(StringIO(r.text), header=0)[0]
for page in tqdm(
range(2, page_num + 1), desc="Please wait for a moment", leave=False
):
url = f"http://www.cerx.cn/dailynewsCN/index_{page}.htm"
r = requests.get(url)
temp_df = pd.read_html(r.text, header=0)[0]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["交易日期"] = pd.to_datetime(big_df["交易日期"]).dt.date
big_df["开盘价"] = pd.to_numeric(big_df["开盘价"])
big_df["最高价"] = pd.to_numeric(big_df["最高价"])
big_df["最低价"] = pd.to_numeric(big_df["最低价"])
big_df["成交均价"] = pd.to_numeric(big_df["成交均价"])
big_df["收盘价"] = pd.to_numeric(big_df["收盘价"])
big_df["成交量"] = pd.to_numeric(big_df["成交量"])
big_df["成交额"] = pd.to_numeric(big_df["成交额"])
big_df.sort_values("交易日期", inplace=True)
r = requests.get(url, headers=headers)
temp_df = pd.read_html(StringIO(r.text), header=0)[0]
big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True)
big_df["交易日期"] = pd.to_datetime(big_df["交易日期"], errors="coerce").dt.date
big_df["开盘价"] = pd.to_numeric(big_df["开盘价"], errors="coerce")
big_df["最高价"] = pd.to_numeric(big_df["最高价"], errors="coerce")
big_df["最低价"] = pd.to_numeric(big_df["最低价"], errors="coerce")
big_df["成交均价"] = pd.to_numeric(big_df["成交均价"], errors="coerce")
big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce")
big_df["成交量"] = pd.to_numeric(big_df["成交量"], errors="coerce")
big_df["成交额"] = pd.to_numeric(big_df["成交额"], errors="coerce")
big_df.sort_values(by="交易日期", inplace=True)
big_df.reset_index(inplace=True, drop=True)
return big_df

Expand All @@ -179,28 +177,26 @@ def energy_carbon_eu() -> pd.DataFrame:
:rtype: pandas.DataFrame
"""
url = "http://www.cerx.cn/dailynewsOuter/index.htm"
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
page_num = int(
soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text
)
big_df = pd.read_html(r.text, header=0)[0]
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, features="lxml")
page_num = int(soup.find(attrs={"class": "pagebar"}).find_all("option")[-1].text)
big_df = pd.read_html(StringIO(r.text), header=0)[0]
for page in tqdm(
range(2, page_num + 1), desc="Please wait for a moment", leave=False
):
url = f"http://www.cerx.cn/dailynewsOuter/index_{page}.htm"
r = requests.get(url)
temp_df = pd.read_html(r.text, header=0)[0]
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df["交易日期"] = pd.to_datetime(big_df["交易日期"]).dt.date
big_df["开盘价"] = pd.to_numeric(big_df["开盘价"])
big_df["最高价"] = pd.to_numeric(big_df["最高价"])
big_df["最低价"] = pd.to_numeric(big_df["最低价"])
big_df["成交均价"] = pd.to_numeric(big_df["成交均价"])
big_df["收盘价"] = pd.to_numeric(big_df["收盘价"])
big_df["成交量"] = pd.to_numeric(big_df["成交量"])
big_df["成交额"] = pd.to_numeric(big_df["成交额"])
big_df.sort_values("交易日期", inplace=True)
temp_df = pd.read_html(StringIO(r.text), header=0)[0]
big_df = pd.concat(objs=[big_df, temp_df], ignore_index=True)
big_df["交易日期"] = pd.to_datetime(big_df["交易日期"], errors="coerce").dt.date
big_df["开盘价"] = pd.to_numeric(big_df["开盘价"], errors="coerce")
big_df["最高价"] = pd.to_numeric(big_df["最高价"], errors="coerce")
big_df["最低价"] = pd.to_numeric(big_df["最低价"], errors="coerce")
big_df["成交均价"] = pd.to_numeric(big_df["成交均价"], errors="coerce")
big_df["收盘价"] = pd.to_numeric(big_df["收盘价"], errors="coerce")
big_df["成交量"] = pd.to_numeric(big_df["成交量"], errors="coerce")
big_df["成交额"] = pd.to_numeric(big_df["成交额"], errors="coerce")
big_df.sort_values(by="交易日期", inplace=True)
big_df.reset_index(inplace=True, drop=True)
return big_df

Expand All @@ -213,63 +209,43 @@ def energy_carbon_hb() -> pd.DataFrame:
:return: 现货交易数据-配额-每日概况行情数据
:rtype: pandas.DataFrame
"""
url = "http://www.hbets.cn/list/13.html"
r = requests.get(url)
soup = BeautifulSoup(r.text, "lxml")
page_string = (
soup.find("div", attrs={"class": "page"}).find_all("span")[-1].text
url = "https://www.hbets.cn/"
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, features="lxml")
data_text = (
soup.find(name="div", attrs={"class": "threeLeft"}).find_all("script")[1].text
)
page_num = int(re.findall(r"\d+", page_string)[-1])
columns = [
item.text
for item in soup.find("ul", attrs={"class": "title"}).find_all("li")
]
big_df = pd.DataFrame()
for page in tqdm(
range(1, page_num + 1), desc="Please wait for a moment", leave=False
):
url = f"http://www.hbets.cn/list/13.html"
params = {"page": page}
r = requests.get(url, params=params)
soup = BeautifulSoup(r.text, "lxml")
page_node = [
item
for item in soup.find(attrs={"class": "future_table"}).find_all(
attrs={"class": "cont"}
)
start_pos = data_text.find("cjj = '[") + 7 # 找到 JSON 数组开始的位置
end_pos = data_text.rfind("cjj =") - 31 # 找到 JSON 数组结束的位置
from akshare.utils import demjson

data_json = demjson.decode(data_text[start_pos:end_pos])
temp_df = pd.DataFrame.from_dict(data_json)
temp_df.rename(
columns={
"riqi": "日期",
"cjj": "成交价",
"cjl": "成交量",
"zx": "最新",
"zd": "涨跌",
},
inplace=True,
)
temp_df = temp_df[
[
"日期",
"成交价",
"成交量",
"最新",
"涨跌",
]
temp_list = []
for item in page_node:
temp_inner_list = []
for inner_item in item.find_all("li"):
temp_inner_list.append(inner_item.text)
temp_list.append(temp_inner_list)
temp_df = pd.DataFrame(temp_list)
big_df = pd.concat([big_df, temp_df], ignore_index=True)
big_df.columns = columns
big_df["交易品种"] = big_df["交易品种"].str.strip()
big_df["日期"] = pd.to_datetime(big_df["日期"]).dt.date
big_df["最新"] = pd.to_numeric(big_df["最新"])
big_df["涨跌幅"] = big_df["涨跌幅"].str.strip("%").str.strip()
big_df["涨跌幅"] = big_df["涨跌幅"].str.strip("%")
big_df["涨跌幅"] = pd.to_numeric(big_df["涨跌幅"])
big_df["最高"] = big_df["最高"].str.replace("--", "")
big_df["最高"] = pd.to_numeric(big_df["最高"])
big_df["最低"] = big_df["最低"].str.replace("--", "")
big_df["最低"] = pd.to_numeric(big_df["最低"])
big_df["成交量"] = big_df["成交量"].str.replace("--", "")
big_df["成交量"] = pd.to_numeric(big_df["成交量"])
big_df["成交额"] = big_df["成交额"].str.replace("--", "")
big_df["成交额"] = pd.to_numeric(big_df["成交额"])
big_df["昨收盘价"] = big_df["昨收盘价"].str.replace("--", "")
big_df["昨收盘价"] = pd.to_numeric(big_df["昨收盘价"])
big_df.dropna(subset=["最新"], inplace=True)
big_df.sort_values("日期", inplace=True)
big_df = big_df[
["日期", "交易品种", "最新", "涨跌幅", "最高", "最低", "成交量", "成交额", "昨收盘价"]
]
big_df.reset_index(inplace=True, drop=True)
return big_df
temp_df["日期"] = pd.to_datetime(temp_df["日期"], errors="coerce").dt.date
temp_df["成交价"] = pd.to_numeric(temp_df["成交价"], errors="coerce")
temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
temp_df["最新"] = pd.to_numeric(temp_df["最新"], errors="coerce")
temp_df["涨跌"] = pd.to_numeric(temp_df["涨跌"], errors="coerce")
return temp_df


@lru_cache()
Expand All @@ -287,7 +263,7 @@ def energy_carbon_gz() -> pd.DataFrame:
"endTime": "2030-09-12",
}
r = requests.get(url, params=params)
temp_df = pd.read_html(r.text, header=0)[1]
temp_df = pd.read_html(StringIO(r.text), header=0)[1]
temp_df.columns = [
"日期",
"品种",
Expand All @@ -300,17 +276,19 @@ def energy_carbon_gz() -> pd.DataFrame:
"成交数量",
"成交金额",
]
temp_df["日期"] = pd.to_datetime(temp_df["日期"], format="%Y%m%d").dt.date
temp_df["开盘价"] = pd.to_numeric(temp_df["开盘价"])
temp_df["收盘价"] = pd.to_numeric(temp_df["收盘价"])
temp_df["最高价"] = pd.to_numeric(temp_df["最高价"])
temp_df["最低价"] = pd.to_numeric(temp_df["最低价"])
temp_df["涨跌"] = pd.to_numeric(temp_df["涨跌"])
temp_df["日期"] = pd.to_datetime(
temp_df["日期"], format="%Y%m%d", errors="coerce"
).dt.date
temp_df["开盘价"] = pd.to_numeric(temp_df["开盘价"], errors="coerce")
temp_df["收盘价"] = pd.to_numeric(temp_df["收盘价"], errors="coerce")
temp_df["最高价"] = pd.to_numeric(temp_df["最高价"], errors="coerce")
temp_df["最低价"] = pd.to_numeric(temp_df["最低价"], errors="coerce")
temp_df["涨跌"] = pd.to_numeric(temp_df["涨跌"], errors="coerce")
temp_df["涨跌幅"] = temp_df["涨跌幅"].str.strip("%")
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
temp_df["成交数量"] = pd.to_numeric(temp_df["成交数量"])
temp_df["成交金额"] = pd.to_numeric(temp_df["成交金额"])
temp_df.sort_values("日期", inplace=True)
temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
temp_df["成交数量"] = pd.to_numeric(temp_df["成交数量"], errors="coerce")
temp_df["成交金额"] = pd.to_numeric(temp_df["成交金额"], errors="coerce")
temp_df.sort_values(by="日期", inplace=True)
temp_df.reset_index(inplace=True, drop=True)
return temp_df

Expand Down
47 changes: 22 additions & 25 deletions docs/data/energy/energy.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ print(energy_carbon_eu_df)

目标地址: http://www.cerx.cn/dailynewsOuter/index.htm

描述: 湖北碳排放权交易中心-现货交易数据-配额-每日概况
描述: 湖北碳排放权交易中心-碳排放权交易数据

限量: 返回从 2017-04-05 至今的所有历史数据
限量: 返回从 2014-04-02 至今的所有历史数据

输入参数

Expand All @@ -238,17 +238,13 @@ print(energy_carbon_eu_df)

输出参数

| 名称 | 类型 | 描述 |
|------|---------|---------|
| 日期 | object | - |
| 交易品种 | object | - |
| 最新 | float64 | - |
| 涨跌幅 | float64 | 注意单位: % |
| 最高 | float64 | - |
| 最低 | float64 | - |
| 成交量 | float64 | - |
| 成交额 | float64 | - |
| 昨收盘价 | float64 | - |
| 名称 | 类型 | 描述 |
|-----|---------|----|
| 日期 | object | - |
| 成交价 | float64 | - |
| 成交量 | float64 | - |
| 最新 | float64 | - |
| 涨跌 | float64 | - |

接口示例

Expand All @@ -262,18 +258,19 @@ print(energy_carbon_hb_df)
数据示例

```
日期 交易品种 最新 涨跌幅 最高 最低 成交量 成交额 昨收盘价
0 2017-04-05 HBEA 16.55 -0.30 17.90 16.50 9382.0 167152.08 NaN
1 2017-04-06 HBEA 16.55 0.00 16.55 15.50 11126.0 179145.25 16.55
2 2017-04-07 HBEA 16.03 -3.14 17.00 16.01 38449.0 637564.37 16.55
3 2017-04-10 HBEA 16.00 -0.19 16.48 16.00 11418.0 184092.65 16.03
4 2017-04-11 HBEA 15.89 -0.69 16.19 15.51 34554.0 551255.02 16.00
... ... ... ... ... ... ... ... ...
1142 2022-02-28 HBEA 51.48 -0.41 54.00 50.00 14478.0 742153.94 51.69
1143 2022-03-01 HBEA 50.50 -1.90 53.00 49.30 16130.0 811078.70 51.48
1144 2022-03-02 HBEA 50.54 0.08 51.95 47.51 11256.0 564317.92 50.50
1145 2022-03-03 HBEA 49.11 -2.83 51.90 48.01 31508.0 1550420.55 50.54
1146 2022-03-04 HBEA 49.30 0.39 50.50 47.00 11774.0 565137.36 49.11
日期 成交价 成交量 最新 涨跌
0 2014-04-02 21.00 510020.0 0.0 0.0
1 2014-04-03 24.20 51468.0 0.0 0.0
2 2014-04-04 26.61 304125.0 0.0 0.0
3 2014-04-08 26.57 112057.0 0.0 0.0
4 2014-04-09 25.07 77473.0 0.0 0.0
... ... ... ... ... ...
2433 2024-06-03 41.72 1314.0 0.0 0.0
2434 2024-06-04 42.01 3260.0 0.0 0.0
2435 2024-06-05 42.09 7031.0 0.0 0.0
2436 2024-06-06 41.97 3691.0 0.0 0.0
2437 2024-06-07 42.41 17613.0 0.0 0.0
[2438 rows x 5 columns]
```

#### 碳排放权-广州
Expand Down

0 comments on commit 3244d04

Please sign in to comment.