V0.9.41 更新一批代码 (#185)

* 0.9.41 新增 disk cache 功能 * 0.9.41 update * update * 0.9.41 fix test * update readme * 0.9.41 优化 streamlit 组件 * 0.9.41 新增 weekly 绩效评价 * 0.9.41 fix test * 0.9.41 新增自相关分析组件 * 0.9.41 update * 0.9.41 update * 0.9.41 fix test * 0.9.41 优化 streamlit 组件
waditu · Jan 21, 2024 · f7ba018 · f7ba018
1 parent 912aeb7
commit f7ba018
Show file tree

Hide file tree

Showing 15 changed files with 558 additions and 48 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -5,7 +5,7 @@ name: Python package
 
 on:
   push:
-    branches: [ master, V0.9.40 ]
+    branches: [ master, V0.9.41 ]
   pull_request:
     branches: [ master ]
 

diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@
 
 * 已经开始用czsc库进行量化研究的朋友，欢迎[加入飞书群](https://applink.feishu.cn/client/chat/chatter/add_by_link?link_token=0bak668e-7617-452c-b935-94d2c209e6cf)，快点击加入吧！
 * [B站视频教程合集（持续更新...）](https://space.bilibili.com/243682308/channel/series)
-* [CZSC策略圈介绍](https://s0cqcxuy3p.feishu.cn/wiki/D12bwh4SriW1Lgk23HUchFKFnpe)
+* [CZSC小圈子](https://s0cqcxuy3p.feishu.cn/wiki/wikcnwXSk9mWnki1b6URPhLA2Hc)
 * [CZSC代码库QA](https://zbczsc.streamlit.app/)
 
 
@@ -33,7 +33,6 @@
 * 基于 Tushare 数据的择时、选股策略回测研究流程
 
 
-
 ## 安装使用
 
 **注意:** python 版本必须大于等于 3.7
@@ -43,6 +42,11 @@
 pip install [email protected]:waditu/czsc.git -U
 ```
 
+直接从github指定分支安装最新版：
+```
+pip install git+https://github.com/waditu/[email protected] -U
+```
+
 从`pypi`安装：
 ```
 pip install czsc -U -i https://pypi.python.org/simple

diff --git a/czsc/__init__.py b/czsc/__init__.py
@@ -69,9 +69,13 @@
     SignalAnalyzer,
     SignalPerformance,
     daily_performance,
+    weekly_performance,
     net_value_stats,
     subtract_fee,
+
     home_path,
+    DiskCache,
+    disk_cache,
     get_dir_size,
     empty_cache_path,
     print_df_sample,
@@ -94,13 +98,16 @@
 # streamlit 量化分析组件
 from czsc.utils.st_components import (
     show_daily_return,
+    show_splited_daily,
     show_monthly_return,
     show_correlation,
     show_sectional_ic,
     show_factor_returns,
     show_factor_layering,
     show_symbol_factor_layering,
     show_weight_backtest,
+    show_ts_rolling_corr,
+    show_ts_self_corr,
 )
 
 from czsc.utils.bi_info import (
@@ -119,10 +126,10 @@
     find_most_similarity,
 )
 
-__version__ = "0.9.40"
+__version__ = "0.9.41"
 __author__ = "zengbin93"
 __email__ = "[email protected]"
-__date__ = "20231218"
+__date__ = "20240114"
 
 
 def welcome():

diff --git a/czsc/connectors/cooperation.py b/czsc/connectors/cooperation.py
@@ -111,11 +111,13 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
     raise ValueError(f"symbol {symbol} 无法识别，获取数据失败！")
 
 
-def stocks_daily_klines(years=None, **kwargs):
+@czsc.disk_cache(path=cache_path, ttl=-1)
+def stocks_daily_klines(sdt='20170101', edt="20240101", **kwargs):
     """获取全市场A股的日线数据"""
     adj = kwargs.get('adj', 'hfq')
-    if years is None:
-        years = ['2017', '2018', '2019', '2020', '2021', '2022', '2023']
+    sdt = pd.to_datetime(sdt).year
+    edt = pd.to_datetime(edt).year
+    years = [str(year) for year in range(sdt, edt + 1)]
 
     res = []
     for year in years:

diff --git a/czsc/utils/__init__.py b/czsc/utils/__init__.py
@@ -18,9 +18,9 @@
 from .plotly_plot import KlineChart
 from .trade import cal_trade_price, update_nbars, update_bbars, update_tbars, risk_free_returns, resample_to_daily
 from .cross import CrossSectionalPerformance, cross_sectional_ranker
-from .stats import daily_performance, net_value_stats, subtract_fee
+from .stats import daily_performance, net_value_stats, subtract_fee, weekly_performance
 from .signal_analyzer import SignalAnalyzer, SignalPerformance
-from .cache import home_path, get_dir_size, empty_cache_path
+from .cache import home_path, get_dir_size, empty_cache_path, DiskCache, disk_cache
 from .index_composition import index_composition
 from .data_client import DataClient, set_url_token, get_url_token
 from .oss import AliyunOSS

diff --git a/czsc/utils/cache.py b/czsc/utils/cache.py
@@ -6,10 +6,19 @@
 """
 
 import os
+import time
+import dill
 import shutil
+import hashlib
+import json
+import pandas as pd
+from pathlib import Path
+from loguru import logger
+from typing import Any
 
-home_path = os.environ.get("CZSC_HOME", os.path.join(os.path.expanduser("~"), '.czsc'))
-os.makedirs(home_path, exist_ok=True)
+
+home_path = Path(os.environ.get("CZSC_HOME", os.path.join(os.path.expanduser("~"), '.czsc')))
+home_path.mkdir(parents=True, exist_ok=True)
 
 
 def get_dir_size(path):
@@ -28,3 +37,139 @@ def empty_cache_path():
     shutil.rmtree(home_path)
     os.makedirs(home_path, exist_ok=False)
     print(f"已清空缓存文件夹：{home_path}")
+
+
+class DiskCache:
+    def __init__(self, path=None):
+        self.path = home_path / "disk_cache" if path is None else Path(path)
+        if self.path.is_file():
+            raise Exception("path has exist")
+
+        self.path.mkdir(parents=True, exist_ok=True)
+
+    def __str__(self) -> str:
+        return "DiskCache: " + str(self.path)
+
+    def is_found(self, k: str, suffix: str = "pkl", ttl=-1) -> bool:
+        """判断缓存文件是否存在
+
+        :param k: 缓存文件名
+        :param suffix: 缓存文件后缀，支持 pkl, json, txt, csv, xlsx
+        :param ttl: 缓存文件有效期，单位：秒，-1 表示永久有效
+        :return: bool
+        """
+        file = self.path / f"{k}.{suffix}"
+        if not file.exists():
+            logger.info(f"文件不存在, {file}")
+            return False
+
+        if ttl > 0:
+            create_time = file.stat().st_ctime
+            if (time.time() - create_time) > ttl:
+                logger.info(f"缓存文件已过期, {file}")
+                return False
+
+        return file.exists()
+
+    def get(self, k: str, suffix: str = "pkl") -> Any:
+        """读取缓存文件
+
+        :param k: 缓存文件名
+        :param suffix: 缓存文件后缀，支持 pkl, json, txt, csv, xlsx
+        :return: 缓存文件内容
+        """
+        file = self.path / f"{k}.{suffix}"
+        logger.info(f"正在读取缓存记录，地址：{file}")
+        if not file.exists():
+            logger.warning(f"文件不存在, {file}")
+            return None
+
+        if suffix == "pkl":
+            res = dill.load(open(file, 'rb'))
+        elif suffix == "json":
+            res = json.load(open(file, 'r', encoding='utf-8'))
+        elif suffix == "txt":
+            res = file.read_text(encoding='utf-8')
+        elif suffix == "csv":
+            res = pd.read_csv(file, encoding='utf-8')
+        elif suffix == "xlsx":
+            res = pd.read_excel(file)
+        else:
+            raise ValueError(f"suffix {suffix} not supported")
+        return res
+
+    def set(self, k: str, v: Any, suffix: str = "pkl"):
+        """写入缓存文件
+
+        :param k: 缓存文件名
+        :param v: 缓存文件内容
+        :param suffix: 缓存文件后缀，支持 pkl, json, txt, csv, xlsx
+        """
+        file = self.path / f"{k}.{suffix}"
+        if file.exists():
+            logger.info(f"缓存文件 {file} 将被覆盖")
+
+        if suffix == "pkl":
+            dill.dump(v, open(file, 'wb'))
+
+        elif suffix == "json":
+            if not isinstance(v, dict):
+                raise ValueError("suffix json only support dict")
+            json.dump(v, open(file, 'w', encoding='utf-8'), ensure_ascii=False, indent=4)
+
+        elif suffix == "txt":
+            if not isinstance(v, str):
+                raise ValueError("suffix txt only support str")
+            file.write_text(v, encoding='utf-8')
+
+        elif suffix == "csv":
+            if not isinstance(v, pd.DataFrame):
+                raise ValueError("suffix csv only support pd.DataFrame")
+            v.to_csv(file, index=False, encoding='utf-8')
+
+        elif suffix == 'xlsx':
+            if not isinstance(v, pd.DataFrame):
+                raise ValueError("suffix xlsx only support pd.DataFrame")
+            v.to_excel(file, index=False)
+
+        else:
+            raise ValueError(f"suffix {suffix} not supported")
+
+        logger.info(f"已写入缓存文件：{file}")
+
+    def remove(self, k: str, suffix: str = "pkl"):
+        file = self.path / f"{k}.{suffix}"
+        logger.info(f"准备删除缓存文件：{file}")
+        Path.unlink(file) if Path.exists(file) else None
+
+
+def disk_cache(path: str, suffix: str = "pkl", ttl: int = -1):
+    """缓存装饰器，支持多种数据格式
+
+    :param path: 缓存文件夹路径
+    :param suffix: 缓存文件后缀，支持 pkl, json, txt, csv, xlsx
+    :param ttl: 缓存文件有效期，单位：秒
+    """
+    assert suffix in ["pkl", "json", "txt", "csv", "xlsx"], "suffix not supported"
+
+    def decorator(func):
+        nonlocal path
+        _c = DiskCache(path=Path(path) / func.__name__)
+
+        def cached_func(*args, **kwargs):
+            hash_str = f"{func.__name__}{args}{kwargs}"
+            k = hashlib.md5(hash_str.encode('utf-8')).hexdigest().upper()[:8]
+            k = f"{k}_{func.__name__}"
+
+            if _c.is_found(k, suffix=suffix, ttl=ttl):
+                output = _c.get(k, suffix=suffix)
+                return output
+
+            else:
+                output = func(*args, **kwargs)
+                _c.set(k, output, suffix=suffix)
+                return output
+
+        return cached_func
+
+    return decorator