Skip to content

Commit

Permalink
0.9.34 新增 data client
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Nov 3, 2023
1 parent 6e73d67 commit 120f892
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 0 deletions.
4 changes: 4 additions & 0 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@
empty_cache_path,
print_df_sample,
index_composition,

DataClient,
set_url_token,
get_url_token,
)

# 交易日历工具
Expand Down
1 change: 1 addition & 0 deletions czsc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .signal_analyzer import SignalAnalyzer, SignalPerformance
from .cache import home_path, get_dir_size, empty_cache_path
from .index_composition import index_composition
from .data_client import DataClient, set_url_token, get_url_token


sorted_freqs = ['Tick', '1分钟', '2分钟', '3分钟', '4分钟', '5分钟', '6分钟', '10分钟', '12分钟',
Expand Down
100 changes: 100 additions & 0 deletions czsc/utils/data_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import hashlib
import requests
import pandas as pd
from time import time
from pathlib import Path
from loguru import logger
from functools import partial


logger.disable(__name__)


def set_url_token(token, url):
"""设置指定 URL 数据接口的凭证码,通常一台机器只需要设置一次即可
:param token: 凭证码
:param url: 数据接口地址
"""
hash_key = hashlib.md5(str(url).encode('utf-8')).hexdigest()
file_token = Path("~").expanduser() / f"{hash_key}.txt"
with open(file_token, 'w', encoding='utf-8') as f:
f.write(token)
logger.info(f"{url} 数据访问凭证码已保存到 {file_token}")


def get_url_token(url):
"""获取指定 URL 数据接口的凭证码"""
hash_key = hashlib.md5(str(url).encode('utf-8')).hexdigest()
file_token = Path("~").expanduser() / f"{hash_key}.txt"
if file_token.exists():
return open(file_token, 'r', encoding='utf-8').read()
logger.warning(f"请设置 {url} 的访问凭证码,如果没有请联系管理员申请")
return None


class DataClient:
def __init__(self, token=None, url='http://api.tushare.pro', timeout=30, **kwargs):
"""数据接口客户端,支持缓存,默认缓存路径为 ~/.quant_data_cache;兼容Tushare数据接口
:param token: str API接口TOKEN,用于用户认证
:param url: str API接口地址
:param timeout: int, 请求超时时间
:param kwargs: dict, 其他参数
- clear_cache: bool, 是否清空缓存
- cache_path: str, 缓存路径
"""
self.__token = token or get_url_token(url)
self.__http_url = url
self.__timeout = timeout
assert self.__token, "请设置czsc_token凭证码,如果没有请联系管理员申请"
self.cache_path = Path(kwargs.get("cache_path", os.path.expanduser("~/.quant_data_cache")))
self.cache_path.mkdir(exist_ok=True, parents=True)
logger.info(f"数据缓存路径:{self.cache_path}")
if kwargs.get("clear_cache", False):
self.clear_cache()

def clear_cache(self):
"""清空缓存"""
for file in self.cache_path.glob("*.pkl"):
file.unlink()
logger.info(f"{self.cache_path} 路径下的数据缓存已清空")

def post_request(self, api_name, fields='', **kwargs):
"""执行API数据查询
:param api_name: str, 查询接口名称
:param fields: str, 查询字段
:param kwargs: dict, 查询参数
:return: pd.DataFrame
"""
stime = time()
if api_name in ['__getstate__', '__setstate__']:
return pd.DataFrame()

req_params = {'api_name': api_name, 'token': self.__token, 'params': kwargs, 'fields': fields}
hash_key = hashlib.md5(str(req_params).encode('utf-8')).hexdigest()
file_cache = self.cache_path / f"{hash_key}.pkl"
if file_cache.exists():
df = pd.read_pickle(file_cache)
logger.info(f"从缓存中获取数据,大小:{(file_cache.stat().st_size / 1048576):.2f}MB")
return df

res = requests.post(self.__http_url, json=req_params, timeout=self.__timeout)
if res:
result = res.json()
if result['code'] != 0:
raise Exception(result['msg'])
df = pd.DataFrame(result['data']['items'], columns=result['data']['fields'])
df.to_pickle(file_cache)
else:
df = pd.DataFrame()

logger.info(f"本次获取数据总耗时:{time() - stime:.2f}秒;API:{api_name};参数:{kwargs};数据量:{df.shape}")
return df

def __getattr__(self, name):
return partial(self.post_request, name)

0 comments on commit 120f892

Please sign in to comment.