diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 5765b546e..92d00c571 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -5,7 +5,7 @@ name: Python package on: push: - branches: [ master, V0.9.40 ] + branches: [ master, V0.9.41 ] pull_request: branches: [ master ] diff --git a/README.md b/README.md index 2528c64e8..799014a87 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ * 已经开始用czsc库进行量化研究的朋友,欢迎[加入飞书群](https://applink.feishu.cn/client/chat/chatter/add_by_link?link_token=0bak668e-7617-452c-b935-94d2c209e6cf),快点击加入吧! * [B站视频教程合集(持续更新...)](https://space.bilibili.com/243682308/channel/series) -* [CZSC策略圈介绍](https://s0cqcxuy3p.feishu.cn/wiki/D12bwh4SriW1Lgk23HUchFKFnpe) +* [CZSC小圈子](https://s0cqcxuy3p.feishu.cn/wiki/wikcnwXSk9mWnki1b6URPhLA2Hc) * [CZSC代码库QA](https://zbczsc.streamlit.app/) @@ -33,7 +33,6 @@ * 基于 Tushare 数据的择时、选股策略回测研究流程 - ## 安装使用 **注意:** python 版本必须大于等于 3.7 @@ -43,6 +42,11 @@ pip install git@github.com:waditu/czsc.git -U ``` +直接从github指定分支安装最新版: +``` +pip install git+https://github.com/waditu/czsc.git@V0.9.41 -U +``` + 从`pypi`安装: ``` pip install czsc -U -i https://pypi.python.org/simple diff --git a/czsc/__init__.py b/czsc/__init__.py index 8959cb8ed..204cd49aa 100644 --- a/czsc/__init__.py +++ b/czsc/__init__.py @@ -69,9 +69,13 @@ SignalAnalyzer, SignalPerformance, daily_performance, + weekly_performance, net_value_stats, subtract_fee, + home_path, + DiskCache, + disk_cache, get_dir_size, empty_cache_path, print_df_sample, @@ -94,6 +98,7 @@ # streamlit 量化分析组件 from czsc.utils.st_components import ( show_daily_return, + show_splited_daily, show_monthly_return, show_correlation, show_sectional_ic, @@ -101,6 +106,8 @@ show_factor_layering, show_symbol_factor_layering, show_weight_backtest, + show_ts_rolling_corr, + show_ts_self_corr, ) from czsc.utils.bi_info import ( @@ -119,10 +126,10 @@ find_most_similarity, ) -__version__ = "0.9.40" +__version__ = "0.9.41" __author__ = "zengbin93" __email__ = "zeng_bin8888@163.com" -__date__ = "20231218" +__date__ = "20240114" def welcome(): diff --git a/czsc/connectors/cooperation.py b/czsc/connectors/cooperation.py index d6840523d..84e924715 100644 --- a/czsc/connectors/cooperation.py +++ b/czsc/connectors/cooperation.py @@ -111,11 +111,13 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs): raise ValueError(f"symbol {symbol} 无法识别,获取数据失败!") -def stocks_daily_klines(years=None, **kwargs): +@czsc.disk_cache(path=cache_path, ttl=-1) +def stocks_daily_klines(sdt='20170101', edt="20240101", **kwargs): """获取全市场A股的日线数据""" adj = kwargs.get('adj', 'hfq') - if years is None: - years = ['2017', '2018', '2019', '2020', '2021', '2022', '2023'] + sdt = pd.to_datetime(sdt).year + edt = pd.to_datetime(edt).year + years = [str(year) for year in range(sdt, edt + 1)] res = [] for year in years: diff --git a/czsc/utils/__init__.py b/czsc/utils/__init__.py index 2d4419cb7..baf2140a6 100644 --- a/czsc/utils/__init__.py +++ b/czsc/utils/__init__.py @@ -18,9 +18,9 @@ from .plotly_plot import KlineChart from .trade import cal_trade_price, update_nbars, update_bbars, update_tbars, risk_free_returns, resample_to_daily from .cross import CrossSectionalPerformance, cross_sectional_ranker -from .stats import daily_performance, net_value_stats, subtract_fee +from .stats import daily_performance, net_value_stats, subtract_fee, weekly_performance from .signal_analyzer import SignalAnalyzer, SignalPerformance -from .cache import home_path, get_dir_size, empty_cache_path +from .cache import home_path, get_dir_size, empty_cache_path, DiskCache, disk_cache from .index_composition import index_composition from .data_client import DataClient, set_url_token, get_url_token from .oss import AliyunOSS diff --git a/czsc/utils/cache.py b/czsc/utils/cache.py index c1e6862f8..a2b6fe4a4 100644 --- a/czsc/utils/cache.py +++ b/czsc/utils/cache.py @@ -6,10 +6,19 @@ """ import os +import time +import dill import shutil +import hashlib +import json +import pandas as pd +from pathlib import Path +from loguru import logger +from typing import Any -home_path = os.environ.get("CZSC_HOME", os.path.join(os.path.expanduser("~"), '.czsc')) -os.makedirs(home_path, exist_ok=True) + +home_path = Path(os.environ.get("CZSC_HOME", os.path.join(os.path.expanduser("~"), '.czsc'))) +home_path.mkdir(parents=True, exist_ok=True) def get_dir_size(path): @@ -28,3 +37,139 @@ def empty_cache_path(): shutil.rmtree(home_path) os.makedirs(home_path, exist_ok=False) print(f"已清空缓存文件夹:{home_path}") + + +class DiskCache: + def __init__(self, path=None): + self.path = home_path / "disk_cache" if path is None else Path(path) + if self.path.is_file(): + raise Exception("path has exist") + + self.path.mkdir(parents=True, exist_ok=True) + + def __str__(self) -> str: + return "DiskCache: " + str(self.path) + + def is_found(self, k: str, suffix: str = "pkl", ttl=-1) -> bool: + """判断缓存文件是否存在 + + :param k: 缓存文件名 + :param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx + :param ttl: 缓存文件有效期,单位:秒,-1 表示永久有效 + :return: bool + """ + file = self.path / f"{k}.{suffix}" + if not file.exists(): + logger.info(f"文件不存在, {file}") + return False + + if ttl > 0: + create_time = file.stat().st_ctime + if (time.time() - create_time) > ttl: + logger.info(f"缓存文件已过期, {file}") + return False + + return file.exists() + + def get(self, k: str, suffix: str = "pkl") -> Any: + """读取缓存文件 + + :param k: 缓存文件名 + :param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx + :return: 缓存文件内容 + """ + file = self.path / f"{k}.{suffix}" + logger.info(f"正在读取缓存记录,地址:{file}") + if not file.exists(): + logger.warning(f"文件不存在, {file}") + return None + + if suffix == "pkl": + res = dill.load(open(file, 'rb')) + elif suffix == "json": + res = json.load(open(file, 'r', encoding='utf-8')) + elif suffix == "txt": + res = file.read_text(encoding='utf-8') + elif suffix == "csv": + res = pd.read_csv(file, encoding='utf-8') + elif suffix == "xlsx": + res = pd.read_excel(file) + else: + raise ValueError(f"suffix {suffix} not supported") + return res + + def set(self, k: str, v: Any, suffix: str = "pkl"): + """写入缓存文件 + + :param k: 缓存文件名 + :param v: 缓存文件内容 + :param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx + """ + file = self.path / f"{k}.{suffix}" + if file.exists(): + logger.info(f"缓存文件 {file} 将被覆盖") + + if suffix == "pkl": + dill.dump(v, open(file, 'wb')) + + elif suffix == "json": + if not isinstance(v, dict): + raise ValueError("suffix json only support dict") + json.dump(v, open(file, 'w', encoding='utf-8'), ensure_ascii=False, indent=4) + + elif suffix == "txt": + if not isinstance(v, str): + raise ValueError("suffix txt only support str") + file.write_text(v, encoding='utf-8') + + elif suffix == "csv": + if not isinstance(v, pd.DataFrame): + raise ValueError("suffix csv only support pd.DataFrame") + v.to_csv(file, index=False, encoding='utf-8') + + elif suffix == 'xlsx': + if not isinstance(v, pd.DataFrame): + raise ValueError("suffix xlsx only support pd.DataFrame") + v.to_excel(file, index=False) + + else: + raise ValueError(f"suffix {suffix} not supported") + + logger.info(f"已写入缓存文件:{file}") + + def remove(self, k: str, suffix: str = "pkl"): + file = self.path / f"{k}.{suffix}" + logger.info(f"准备删除缓存文件:{file}") + Path.unlink(file) if Path.exists(file) else None + + +def disk_cache(path: str, suffix: str = "pkl", ttl: int = -1): + """缓存装饰器,支持多种数据格式 + + :param path: 缓存文件夹路径 + :param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx + :param ttl: 缓存文件有效期,单位:秒 + """ + assert suffix in ["pkl", "json", "txt", "csv", "xlsx"], "suffix not supported" + + def decorator(func): + nonlocal path + _c = DiskCache(path=Path(path) / func.__name__) + + def cached_func(*args, **kwargs): + hash_str = f"{func.__name__}{args}{kwargs}" + k = hashlib.md5(hash_str.encode('utf-8')).hexdigest().upper()[:8] + k = f"{k}_{func.__name__}" + + if _c.is_found(k, suffix=suffix, ttl=ttl): + output = _c.get(k, suffix=suffix) + return output + + else: + output = func(*args, **kwargs) + _c.set(k, output, suffix=suffix) + return output + + return cached_func + + return decorator diff --git a/czsc/utils/st_components.py b/czsc/utils/st_components.py index 7bf4d8cdc..e260b0099 100644 --- a/czsc/utils/st_components.py +++ b/czsc/utils/st_components.py @@ -1,8 +1,11 @@ import czsc +import hashlib import numpy as np import pandas as pd import streamlit as st import plotly.express as px +import statsmodels.api as sm +import plotly.graph_objects as go from sklearn.linear_model import LinearRegression @@ -17,8 +20,13 @@ def show_daily_return(df, **kwargs): - legend_only_cols: list,仅在图例中展示的列名 """ + if not df.index.dtype == 'datetime64[ns]': + df['dt'] = pd.to_datetime(df['dt']) + df.set_index('dt', inplace=True) + assert df.index.dtype == 'datetime64[ns]', "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换" df = df.copy().fillna(0) + df.sort_index(inplace=True, ascending=True) def _stats(df_, type_='持有日'): df_ = df_.copy() @@ -32,8 +40,34 @@ def _stats(df_, type_='持有日'): col_stats['日收益名称'] = col stats.append(col_stats) stats = pd.DataFrame(stats).set_index('日收益名称') - fmt_cols = ['年化', '夏普', '最大回撤', '卡玛', '年化波动率', '非零覆盖', '日胜率', '盈亏平衡点'] - stats = stats.style.background_gradient(cmap='RdYlGn_r', axis=None, subset=fmt_cols).format('{:.4f}') + # fmt_cols = ['年化', '夏普', '最大回撤', '卡玛', '年化波动率', '非零覆盖', '日胜率', '盈亏平衡点', '新高间隔', '新高占比'] + # stats = stats.style.background_gradient(cmap='RdYlGn_r', axis=None, subset=fmt_cols).format('{:.4f}') + + stats = stats.style.background_gradient(cmap='RdYlGn_r', axis=None, subset=['年化']) + stats = stats.background_gradient(cmap='RdYlGn_r', axis=None, subset=['夏普']) + stats = stats.background_gradient(cmap='RdYlGn', axis=None, subset=['最大回撤']) + stats = stats.background_gradient(cmap='RdYlGn_r', axis=None, subset=['卡玛']) + stats = stats.background_gradient(cmap='RdYlGn', axis=None, subset=['年化波动率']) + stats = stats.background_gradient(cmap='RdYlGn', axis=None, subset=['盈亏平衡点']) + stats = stats.background_gradient(cmap='RdYlGn_r', axis=None, subset=['日胜率']) + stats = stats.background_gradient(cmap='RdYlGn_r', axis=None, subset=['非零覆盖']) + stats = stats.background_gradient(cmap='RdYlGn', axis=None, subset=['新高间隔']) + stats = stats.background_gradient(cmap='RdYlGn_r', axis=None, subset=['新高占比']) + + stats = stats.format( + { + '盈亏平衡点': '{:.2f}', + '年化波动率': '{:.2%}', + '最大回撤': '{:.2%}', + '卡玛': '{:.2f}', + '年化': '{:.2%}', + '夏普': '{:.2f}', + '非零覆盖': '{:.2%}', + '日胜率': '{:.2%}', + '新高间隔': '{:.2f}', + '新高占比': '{:.2%}', + } + ) return stats with st.container(): @@ -42,15 +76,12 @@ def _stats(df_, type_='持有日'): st.subheader(title) st.divider() + st.write("交易日绩效指标") + st.dataframe(_stats(df, type_='交易日'), use_container_width=True) + if kwargs.get("stat_hold_days", True): - col1, col2 = st.columns([1, 1]) - col1.write("交易日绩效指标") - col1.dataframe(_stats(df, type_='交易日'), use_container_width=True) - col2.write("持有日绩效指标") - col2.dataframe(_stats(df, type_='持有日'), use_container_width=True) - else: - st.write("绩效指标") - st.dataframe(_stats(df, type_='交易日'), use_container_width=True) + st.write("持有日绩效指标") + st.dataframe(_stats(df, type_='持有日'), use_container_width=True) df = df.cumsum() fig = px.line(df, y=df.columns.to_list(), title="日收益累计曲线") @@ -68,9 +99,24 @@ def _stats(df_, type_='持有日'): def show_monthly_return(df, ret_col='total', title="月度累计收益", **kwargs): - """展示指定列的月度累计收益""" - assert df.index.dtype == 'datetime64[ns]', "index 必须是 datetime 类型" - st.subheader(title, divider="rainbow") + """展示指定列的月度累计收益 + + :param df: pd.DataFrame,数据源 + :param ret_col: str,收益列名 + :param title: str,标题 + :param kwargs: + """ + if not df.index.dtype == 'datetime64[ns]': + df['dt'] = pd.to_datetime(df['dt']) + df.set_index('dt', inplace=True) + + assert df.index.dtype == 'datetime64[ns]', "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换" + df = df.copy().fillna(0) + df.sort_index(inplace=True, ascending=True) + + if title: + st.subheader(title, divider="rainbow") + monthly = df[[ret_col]].resample('M').sum() monthly['year'] = monthly.index.year monthly['month'] = monthly.index.month @@ -78,7 +124,11 @@ def show_monthly_return(df, ret_col='total', title="月度累计收益", **kwarg month_cols = [f"{x}月" for x in monthly.columns] monthly.columns = month_cols monthly['年收益'] = monthly.sum(axis=1) - monthly = monthly.style.background_gradient(cmap='RdYlGn_r', axis=None, subset=month_cols).format('{:.2%}', na_rep='-') + + monthly = monthly.style.background_gradient(cmap='RdYlGn_r', axis=None, subset=month_cols) + monthly = monthly.background_gradient(cmap='RdYlGn_r', axis=None, subset=['年收益']) + monthly = monthly.format('{:.2%}', na_rep='-') + st.dataframe(monthly, use_container_width=True) @@ -119,8 +169,10 @@ def show_sectional_ic(df, x_col, y_col, method='pearson', **kwargs): dfm = pd.pivot_table(dfm, index='year', columns='month', values='ic') col4.write("月度IC分析结果:") - col4.dataframe(dfm.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', na_rep='MISS'), - use_container_width=True) + col4.dataframe( + dfm.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', na_rep='MISS'), + use_container_width=True, + ) if kwargs.get("show_factor_histgram", False): fig = px.histogram(df, x=x_col, marginal="box", title="因子数据分布图") @@ -170,7 +222,7 @@ def show_factor_layering(df, x_col, y_col='n1b', **kwargs): """ n = kwargs.get("n", 10) - if df[y_col].max() > 100: # 收益率单位为BP, 转换为万分之一 + if df[y_col].max() > 100: # 收益率单位为BP, 转换为万分之一 df[y_col] = df[y_col] / 10000 df = czsc.feture_cross_layering(df, x_col, n=n) @@ -225,7 +277,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs): """ df = df.copy() n = kwargs.get("n", 10) - if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一 + if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一 df[y_col] = df[y_col] / 10000 if f'{x_col}分层' not in df.columns: @@ -310,9 +362,206 @@ def show_weight_backtest(dfw, **kwargs): dret.index = pd.to_datetime(dret.index) show_daily_return(dret, legend_only_cols=dfw['symbol'].unique().tolist()) - if kwargs.get("show_daily_detail", False): - with st.expander("查看品种等权日收益详情", expanded=False): + if kwargs.get("show_backtest_detail", False): + c1, c2 = st.columns([1, 1]) + with c1.expander("品种等权日收益", expanded=False): df_ = wb.results['品种等权日收益'].copy() st.dataframe(df_.style.background_gradient(cmap='RdYlGn_r').format("{:.2%}"), use_container_width=True) + with c2.expander("查看开平交易对", expanded=False): + dfp = pd.concat([v['pairs'] for k, v in wb.results.items() if k in wb.symbols], ignore_index=True) + st.dataframe(dfp, use_container_width=True) + + if kwargs.get("show_splited_daily", False): + with st.expander("品种等权日收益分段表现", expanded=False): + show_splited_daily(dret[['total']].copy(), ret_col='total') + return wb + + +def show_splited_daily(df, ret_col, **kwargs): + """展示分段日收益表现 + + :param df: pd.DataFrame + :param ret_col: str, df 中的列名,指定收益列 + :param kwargs: + + sub_title: str, 子标题 + + """ + if not df.index.dtype == 'datetime64[ns]': + df['dt'] = pd.to_datetime(df['dt']) + df.set_index('dt', inplace=True) + + assert df.index.dtype == 'datetime64[ns]', "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换" + df = df.copy().fillna(0) + df.sort_index(inplace=True, ascending=True) + + sub_title = kwargs.get("sub_title", "") + if sub_title: + st.subheader(sub_title, divider="rainbow") + + last_dt = df.index[-1] + sdt_map = { + "过去1周": last_dt - pd.Timedelta(days=7), + "过去2周": last_dt - pd.Timedelta(days=14), + "过去1月": last_dt - pd.Timedelta(days=30), + "过去3月": last_dt - pd.Timedelta(days=90), + "过去6月": last_dt - pd.Timedelta(days=180), + "过去1年": last_dt - pd.Timedelta(days=365), + "今年以来": pd.to_datetime(f"{last_dt.year}-01-01"), + } + + rows = [] + for name, sdt in sdt_map.items(): + df1 = df.loc[sdt:last_dt].copy() + row = czsc.daily_performance(df1[ret_col]) + row['开始日期'] = sdt.strftime('%Y-%m-%d') + row['结束日期'] = last_dt.strftime('%Y-%m-%d') + row['收益名称'] = name + row['绝对收益'] = df1[ret_col].sum() + rows.append(row) + dfv = pd.DataFrame(rows).set_index('收益名称') + cols = ['开始日期', '结束日期', '绝对收益', '年化', '夏普', '最大回撤', '卡玛', '年化波动率', '非零覆盖', '日胜率', '盈亏平衡点'] + dfv = dfv[cols].copy() + + dfv = dfv.style.background_gradient(cmap='RdYlGn_r', subset=['绝对收益']) + dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['年化']) + dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['夏普']) + dfv = dfv.background_gradient(cmap='RdYlGn', subset=['最大回撤']) + dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['卡玛']) + dfv = dfv.background_gradient(cmap='RdYlGn', subset=['年化波动率']) + dfv = dfv.background_gradient(cmap='RdYlGn', subset=['盈亏平衡点']) + dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['日胜率']) + dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['非零覆盖']) + dfv = dfv.format( + { + '盈亏平衡点': '{:.2f}', + '年化波动率': '{:.2%}', + '最大回撤': '{:.2%}', + '卡玛': '{:.2f}', + '年化': '{:.2%}', + '夏普': '{:.2f}', + '非零覆盖': '{:.2%}', + '日胜率': '{:.2%}', + '绝对收益': '{:.2%}', + } + ) + st.dataframe(dfv, use_container_width=True) + + +def show_ts_rolling_corr(df, col1, col2, **kwargs): + """时序上按 rolling 的方式计算相关系数 + + :param df: pd.DataFrame, 必须包含列 dt 和 col1, col2 + :param col1: str, df 中的列名 + :param col2: str, df 中的列名 + :param kwargs: + + - min_periods: int, 最小滑动窗口长度 + - window: int, 滑动窗口长度,0 表示按 expanding 方式滑动 + - corr_method: str, 相关系数计算方法,可选 pearson, kendall, spearman + - sub_title: str, 子标题 + """ + if col1 not in df.columns or col2 not in df.columns: + st.error(f"列 {col1} 或 {col2} 不存在,请重新输入") + return + + if not isinstance(df.index, pd.DatetimeIndex): + df['dt'] = pd.to_datetime(df['dt']) + df = df.set_index('dt') + + df = df[[col1, col2]].copy() + if df.isnull().sum().sum() > 0: + st.dataframe(df[df.isnull().sum(axis=1) > 0]) + st.error(f"列 {col1} 或 {col2} 中存在缺失值,请先处理缺失值") + return + + sub_title = kwargs.get('sub_title', None) + if sub_title: + st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8]) + + min_periods = kwargs.get('min_periods', None) + window = kwargs.get('window', None) + corr_method = kwargs.get('corr_method', 'pearson') + + if not window or window <= 0: + method = 'expanding' + corr_result = df[col1].expanding(min_periods=min_periods).corr(df[col2], pairwise=True) + else: + method = 'rolling' + corr_result = df[col1].rolling(window=window, min_periods=min_periods).corr(df[col2], pairwise=True) + + corr_result = corr_result.dropna() + corr_result = corr_result.rename('corr') + line = go.Scatter(x=corr_result.index, y=corr_result, mode='lines', name='corr') + layout = go.Layout( + title=f'滑动({method})相关系数', + xaxis=dict(title=''), + yaxis=dict(title='corr'), + annotations=[ + dict( + x=0.0, + y=1.05, + showarrow=False, + xref="paper", + yref="paper", + font=dict(size=12), + text=f"滑动窗口长度:{window},最小滑动窗口长度:{min_periods},相关系数计算方法:{corr_method}", + ) + ], + ) + fig = go.Figure(data=[line], layout=layout) + st.plotly_chart(fig, use_container_width=True) + + +def show_ts_self_corr(df, col, **kwargs): + """展示时序上单因子的自相关性分析结果,贡献者:guo + + :param df: pd.DataFrame, 必须包含列 dt 和 col + :param col: str, df 中的列名 + """ + if not isinstance(df.index, pd.DatetimeIndex): + df['dt'] = pd.to_datetime(df['dt']) + df = df.set_index('dt') + df = df.sort_index(ascending=True) + + if df[col].isnull().sum() > 0: + st.dataframe(df[df[col].isnull()]) + st.error(f"列 {col} 中存在缺失值,请先处理缺失值") + return + + col1, col2 = st.columns(2) + + with col1: + sub_title = f"自相关系数分析({col})" + st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8]) + c1, c2, c3 = st.columns([2, 2, 1]) + nlags = int(c1.number_input('最大滞后阶数', value=20, min_value=1, max_value=100, step=1)) + method = c2.selectbox('选择分析方法', ['acf', 'pacf'], index=0) + + if method == 'acf': + acf_result, conf_int = sm.tsa.acf(df[[col]].copy(), nlags=nlags, alpha=0.05, missing='raise') + else: + acf_result, conf_int = sm.tsa.pacf(df[[col]].copy(), nlags=nlags, alpha=0.05) + + bar = go.Bar(x=list(range(len(acf_result))), y=acf_result, name='自相关系数') + upper = go.Scatter(x=list(range(len(acf_result))), y=conf_int[:, 1], mode='lines', name='95%置信区间上界') + lower = go.Scatter(x=list(range(len(acf_result))), y=conf_int[:, 0], mode='lines', name='95%置信区间下界') + layout = go.Layout(title=method.upper(), xaxis=dict(title='滞后阶数'), yaxis=dict(title='自相关系数')) + fig = go.Figure(data=[bar, upper, lower], layout=layout) + st.plotly_chart(fig, use_container_width=True) + + with col2: + sub_title = f"滞后N阶滑动相关性({col})" + st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8]) + c1, c2, c3, c4 = st.columns(4) + min_periods = int(c1.number_input('最小滑动窗口长度', value=20, min_value=0, step=1)) + window = int(c2.number_input('滑动窗口长度', value=0, step=1, help='0 表示按 expanding 方式滑动')) + corr_method = c3.selectbox('相关系数计算方法', ['pearson', 'kendall', 'spearman']) + n = int(c4.number_input('自相关滞后阶数', value=1, min_value=1, step=1)) + + df[f"{col}_lag{n}"] = df[col].shift(-n) + df.dropna(subset=[f"{col}_lag{n}"], inplace=True) + + show_ts_rolling_corr(df, col, f"{col}_lag{n}", min_periods=min_periods, window=window, corr_method=corr_method) diff --git a/czsc/utils/stats.py b/czsc/utils/stats.py index 4c1b8912e..5a234838f 100644 --- a/czsc/utils/stats.py +++ b/czsc/utils/stats.py @@ -88,7 +88,7 @@ def daily_performance(daily_returns): if len(daily_returns) == 0 or np.std(daily_returns) == 0 or all(x == 0 for x in daily_returns): return {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, - "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, "最大新高时间": 0} + "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, "新高间隔": 0, "新高占比": 0} annual_returns = np.sum(daily_returns) / len(daily_returns) * 252 sharpe_ratio = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252) @@ -106,6 +106,9 @@ def daily_performance(daily_returns): for i in range(len(high_index) - 1): max_interval = max(max_interval, high_index[i + 1] - high_index[i]) + # 计算新高时间占比 + high_pct = len(high_index) / len(dd) + def __min_max(x, min_val, max_val, digits=4): if x < min_val: x1 = min_val @@ -124,7 +127,64 @@ def __min_max(x, min_val, max_val, digits=4): "年化波动率": round(annual_volatility, 4), "非零覆盖": round(none_zero_cover, 4), "盈亏平衡点": round(cal_break_even_point(daily_returns), 4), - "最大新高时间": max_interval, + "新高间隔": max_interval, + "新高占比": round(high_pct, 4), + } + return sta + + +def weekly_performance(weekly_returns): + """采用单利计算周收益数据的各项指标 + + :param weekly_returns: 周收益率数据,样例: + [0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01] + :return: dict + """ + weekly_returns = np.array(weekly_returns, dtype=np.float64) + + if len(weekly_returns) == 0 or np.std(weekly_returns) == 0 or all(x == 0 for x in weekly_returns): + return {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "周胜率": 0, + "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, "新高间隔": 0, "新高占比": 0} + + annual_returns = np.sum(weekly_returns) / len(weekly_returns) * 52 + sharpe_ratio = np.mean(weekly_returns) / np.std(weekly_returns) * np.sqrt(52) + cum_returns = np.cumsum(weekly_returns) + dd = np.maximum.accumulate(cum_returns) - cum_returns + max_drawdown = np.max(dd) + kama = annual_returns / max_drawdown if max_drawdown != 0 else 10 + win_pct = len(weekly_returns[weekly_returns >= 0]) / len(weekly_returns) + annual_volatility = np.std(weekly_returns) * np.sqrt(52) + none_zero_cover = len(weekly_returns[weekly_returns != 0]) / len(weekly_returns) + + # 计算最大新高间隔 + high_index = [i for i, x in enumerate(dd) if x == 0] + max_interval = 0 + for i in range(len(high_index) - 1): + max_interval = max(max_interval, high_index[i + 1] - high_index[i]) + + # 计算新高时间占比 + high_pct = len(high_index) / len(dd) + + def __min_max(x, min_val, max_val, digits=4): + if x < min_val: + x1 = min_val + elif x > max_val: + x1 = max_val + else: + x1 = x + return round(x1, digits) + + sta = { + "年化": round(annual_returns, 4), + "夏普": __min_max(sharpe_ratio, -5, 5, 2), + "最大回撤": round(max_drawdown, 4), + "卡玛": __min_max(kama, -10, 10, 2), + "周胜率": round(win_pct, 4), + "年化波动率": round(annual_volatility, 4), + "非零覆盖": round(none_zero_cover, 4), + "盈亏平衡点": round(cal_break_even_point(weekly_returns), 4), + "新高间隔": max_interval, + "时间占比": round(high_pct, 4), } return sta diff --git a/docs/requirements.txt b/docs/requirements.txt index eea0f073e..88d3f6e1d 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -28,4 +28,5 @@ parse>=1.19.0 lightgbm>=4.0.0 streamlit redis -oss2 \ No newline at end of file +oss2 +statsmodels \ No newline at end of file diff --git a/examples/close_sma5_dist.py b/examples/dropit/close_sma5_dist.py similarity index 100% rename from examples/close_sma5_dist.py rename to examples/dropit/close_sma5_dist.py diff --git a/examples/explore_func_tree.py b/examples/dropit/explore_func_tree.py similarity index 100% rename from examples/explore_func_tree.py rename to examples/dropit/explore_func_tree.py diff --git a/examples/use_cta_research.py b/examples/use_cta_research.py index bec7cdf92..696663b29 100644 --- a/examples/use_cta_research.py +++ b/examples/use_cta_research.py @@ -3,7 +3,7 @@ author: zengbin93 email: zeng_bin8888@163.com create_dt: 2023/6/7 21:12 -describe: +describe: """ import sys sys.path.insert(0, '.') @@ -23,6 +23,3 @@ if __name__ == '__main__': # 策略回测,如果是使用多进程,必须在 __main__ 中执行,且必须是在命令行中执行 bot.backtest(symbols=get_symbols("中证500成分股")[:10], max_workers=3, bar_sdt='20190101', edt='20220101', sdt='20200101') - - - diff --git a/requirements.txt b/requirements.txt index f999f0c79..10fbcd5c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,5 @@ parse>=1.19.0 lightgbm>=4.0.0 streamlit redis -oss2 \ No newline at end of file +oss2 +statsmodels \ No newline at end of file diff --git a/test/test_utils.py b/test/test_utils.py index 8c8f98bf6..880048378 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -96,28 +96,28 @@ def test_daily_performance(): # Test case 1: empty daily returns result = daily_performance([]) - assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, - "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '最大新高时间': 0} + assert result == {'年化': 0, '夏普': 0, '最大回撤': 0, '卡玛': 0, '日胜率': 0, '年化波动率': 0, + '非零覆盖': 0, '盈亏平衡点': 0, '新高间隔': 0, '新高占比': 0} # Test case 2: daily returns with zero standard deviation result = daily_performance([1, 1, 1, 1, 1]) assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, - "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '最大新高时间': 0} + "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '新高间隔': 0, '新高占比': 0} # Test case 3: daily returns with all zeros result = daily_performance([0, 0, 0, 0, 0]) assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, - "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '最大新高时间': 0} + "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '新高间隔': 0, '新高占比': 0} # Test case 4: normal daily returns daily_returns = np.array([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01]) result = daily_performance(daily_returns) - assert result == {'年化': 2.016, '夏普': 5, '最大回撤': 0.02, '卡玛': 10, '日胜率': 0.7, - '年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7, '最大新高时间': 4} + assert result == {'年化': 2.016, '夏普': 5, '最大回撤': 0.02, '卡玛': 10, '日胜率': 0.7, '年化波动率': 0.2439, + '非零覆盖': 1.0, '盈亏平衡点': 0.7, '新高间隔': 4, '新高占比': 0.6} result = daily_performance([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01]) - assert result == {'年化': 2.016, '夏普': 5, '最大回撤': 0.02, '卡玛': 10, '日胜率': 0.7, - '年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7, '最大新高时间': 4} + assert result == {'年化': 2.016, '夏普': 5, '最大回撤': 0.02, '卡玛': 10, '日胜率': 0.7, '年化波动率': 0.2439, + '非零覆盖': 1.0, '盈亏平衡点': 0.7, '新高间隔': 4, '新高占比': 0.6} def test_find_most_similarity(): diff --git a/test/test_utils_cache.py b/test/test_utils_cache.py new file mode 100644 index 000000000..15be44bf9 --- /dev/null +++ b/test/test_utils_cache.py @@ -0,0 +1,44 @@ +import os +import pandas as pd +from czsc.utils.cache import disk_cache, home_path, empty_cache_path + +empty_cache_path() +temp_path = os.path.join(home_path, "temp") + + +# Create a simple function for testing +@disk_cache(path=temp_path, suffix="pkl", ttl=100) +def run_func_x(x): + return x * 2 + + +@disk_cache(path=temp_path, suffix="xlsx", ttl=100) +def run_func_y(x): + df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'x': [x, x, x]}) + return df + + +def test_disk_cache(): + # Call the function + result = run_func_x(5) + + # Check if the output is correct + assert result == 10 + + # Call the function again with the same argument + result = run_func_x(5) + + # Check if the output is still correct + assert result == 10 + + # Check if the cache file exists + files = os.listdir(os.path.join(temp_path, "run_func_x")) + assert len(files) == 1 + + # Call the function with a different argument + result = run_func_y(5) + files = os.listdir(os.path.join(temp_path, "run_func_y")) + assert len(files) == 1 + file_xlsx = [x for x in files if x.endswith("xlsx")][0] + df = pd.read_excel(os.path.join(temp_path, f"run_func_y/{file_xlsx}")) + assert isinstance(df, pd.DataFrame)