Skip to content

Commit

Permalink
0.9.30 优化部分功能代码
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Oct 5, 2023
1 parent 34b388a commit c788a57
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 49 deletions.
78 changes: 52 additions & 26 deletions czsc/traders/weight_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ def get_ensemble_weight(trader: CzscTrader, method: Union[AnyStr, Callable] = 'm


class WeightBacktest:
"""持仓权重回测"""
"""持仓权重回测
飞书文档:https://s0cqcxuy3p.feishu.cn/wiki/Pf1fw1woQi4iJikbKJmcYToznxb
"""
version = "V231005"

def __init__(self, dfw, digits=2, **kwargs) -> None:
"""持仓权重回测
Expand All @@ -142,7 +146,7 @@ def __init__(self, dfw, digits=2, **kwargs) -> None:
dt 为K线结束时间,必须是连续的交易时间序列,不允许有时间断层
symbol 为合约代码,
weight 为K线结束时间对应的持仓权重,
weight 为K线结束时间对应的持仓权重,品种之间的权重是独立的,不会互相影响
price 为结束时间对应的交易价格,可以是当前K线的收盘价,或者下一根K线的开盘价,或者未来N根K线的TWAP、VWAP等
数据样例如下:
Expand All @@ -169,10 +173,7 @@ def __init__(self, dfw, digits=2, **kwargs) -> None:
self.fee_rate = kwargs.get('fee_rate', 0.0002)
self.dfw['weight'] = self.dfw['weight'].round(digits)
self.symbols = list(self.dfw['symbol'].unique().tolist())
self.res_path = Path(kwargs.get('res_path', "weight_backtest"))
self.res_path.mkdir(exist_ok=True, parents=True)
logger.add(self.res_path.joinpath("weight_backtest.log"), rotation="1 week")
logger.info(f"持仓权重回测参数:digits={digits}, fee_rate={self.fee_rate},res_path={self.res_path},kwargs={kwargs}")
self.results = self.backtest()

def get_symbol_daily(self, symbol):
"""获取某个合约的每日收益率
Expand Down Expand Up @@ -285,39 +286,64 @@ def __add_operate(dt, bar_id, volume, price, operate):

def backtest(self):
"""回测所有合约的收益率"""
symbols = self.symbols
res = {}
for symbol in self.symbols:
for symbol in symbols:
daily = self.get_symbol_daily(symbol)
pairs = self.get_symbol_pairs(symbol)
res[symbol] = {"daily": daily, "pairs": pairs}

pd.to_pickle(res, self.res_path.joinpath("res.pkl"))
logger.info(f"回测结果已保存到 {self.res_path.joinpath('res.pkl')}")

# 品种等权费后日收益率
dret = pd.concat([v['daily'] for v in res.values()], ignore_index=True)
dret = pd.concat([v['daily'] for k, v in res.items() if k in symbols], ignore_index=True)
dret = pd.pivot_table(dret, index='date', columns='symbol', values='return').fillna(0)
dret['total'] = dret[list(res.keys())].mean(axis=1)
res['品种等权日收益'] = dret

stats = {"开始日期": dret.index.min().strftime("%Y%m%d"), "结束日期": dret.index.max().strftime("%Y%m%d")}
stats.update(daily_performance(dret['total']))
logger.info(f"品种等权费后日收益率:{stats}")
dret.to_excel(self.res_path.joinpath("daily_return.xlsx"), index=True)
logger.info(f"品种等权费后日收益率已保存到 {self.res_path.joinpath('daily_return.xlsx')}")
dfp = pd.concat([v['pairs'] for k, v in res.items() if k in symbols], ignore_index=True)
pairs_stats = evaluate_pairs(dfp)
pairs_stats = {k: v for k, v in pairs_stats.items() if k in ['单笔收益', '持仓K线数', '交易胜率', '持仓天数']}
stats.update(pairs_stats)

res['绩效评价'] = stats
return res

def report(self, res_path):
"""回测报告"""
res_path = Path(res_path)
res_path.mkdir(exist_ok=True, parents=True)
logger.add(res_path.joinpath("weight_backtest.log"), rotation="1 week")
logger.info(f"持仓权重回测参数:digits={self.digits}, fee_rate={self.fee_rate},res_path={res_path}")

res = self.results
pd.to_pickle(res, res_path.joinpath("res.pkl"))
logger.info(f"回测结果已保存到 {res_path.joinpath('res.pkl')}")

# 品种等权费后日收益率
dret = res['品种等权日收益'].copy()
# dret = pd.concat([v['daily'] for v in res.values()], ignore_index=True)
# dret = pd.pivot_table(dret, index='date', columns='symbol', values='return').fillna(0)
# dret['total'] = dret[list(res.keys())].mean(axis=1)
# stats = {"开始日期": dret.index.min().strftime("%Y%m%d"), "结束日期": dret.index.max().strftime("%Y%m%d")}
# stats.update(daily_performance(dret['total']))
# logger.info(f"品种等权费后日收益率:{stats}")
dret.to_excel(res_path.joinpath("daily_return.xlsx"), index=True)
logger.info(f"品种等权费后日收益率已保存到 {res_path.joinpath('daily_return.xlsx')}")

# 品种等权费后日收益率资金曲线绘制
dret = dret.cumsum()
fig = px.line(dret, y=dret.columns.to_list(), title="费后日收益率资金曲线")
fig.for_each_trace(lambda trace: trace.update(visible=True if trace.name == 'total' else 'legendonly'))
fig.write_html(self.res_path.joinpath("daily_return.html"))
logger.info(f"费后日收益率资金曲线已保存到 {self.res_path.joinpath('daily_return.html')}")
fig.write_html(res_path.joinpath("daily_return.html"))
logger.info(f"费后日收益率资金曲线已保存到 {res_path.joinpath('daily_return.html')}")

# 所有开平交易记录的表现
dfp = pd.concat([v['pairs'] for v in res.values()], ignore_index=True)
pairs_stats = evaluate_pairs(dfp)
pairs_stats = {k: v for k, v in pairs_stats.items() if k in ['单笔收益', '持仓K线数', '交易胜率', '持仓天数']}
logger.info(f"所有开平交易记录的表现:{pairs_stats}")
stats.update(pairs_stats)
logger.info(f"策略评价:{stats}")
save_json(stats, self.res_path.joinpath("stats.json"))
res['stats'] = stats
return res
stats = res['绩效评价'].copy()
# dfp = pd.concat([v['pairs'] for v in res.values()], ignore_index=True)
# pairs_stats = evaluate_pairs(dfp)
# pairs_stats = {k: v for k, v in pairs_stats.items() if k in ['单笔收益', '持仓K线数', '交易胜率', '持仓天数']}
# logger.info(f"所有开平交易记录的表现:{pairs_stats}")
# stats.update(pairs_stats)
logger.info(f"绩效评价:{stats}")
save_json(stats, res_path.joinpath("stats.json"))
logger.info(f"绩效评价已保存到 {res_path.joinpath('stats.json')}")
14 changes: 5 additions & 9 deletions czsc/utils/cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,11 @@ def cross_sectional_ranker(df, x_cols, y_col, **kwargs):
:return: df, 包含预测分数和排序列
"""
from lightgbm import LGBMRanker
from sklearn.model_selection import TimeSeriesSplit
try:
from lightgbm import LGBMRanker
except:
logger.warning("lightgbm not installed, please install it first! (pip install lightgbm -U)")
return df


assert "symbol" in df.columns, "df must have column 'symbol'"
assert "dt" in df.columns, f"df must have column 'dt'"
assert "dt" in df.columns, "df must have column 'dt'"

if kwargs.get('copy', True):
df = df.copy()
Expand All @@ -213,11 +209,11 @@ def cross_sectional_ranker(df, x_cols, y_col, **kwargs):

for train_index, test_index in tss.split(dfd):
train_dts = dfd[train_index][:, 0]
test_dts= dfd[test_index][:, 0]
test_dts = dfd[test_index][:, 0]

# 拆分训练集和测试集
train, test = df[df['dt'].isin(train_dts)], df[df['dt'].isin(test_dts)]
X_train, X_test, y_train = train[x_cols], test[x_cols], train[y_col]
X_train, X_test, y_train = train[x_cols], test[x_cols], train[y_col]
query_train = train.groupby('dt')['symbol'].count().values

# 训练模型 & 预测
Expand Down
29 changes: 15 additions & 14 deletions czsc/utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@
"""
import numpy as np
import pandas as pd
from typing import List


def cal_break_even_point(seq) -> float:
"""计算单笔收益序列的盈亏平衡点
:param seq: 单笔收益序列,数据样例:[0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01]
:return: 盈亏平衡点
"""
if sum(seq) < 0:
return 1.0
seq = np.cumsum(sorted(seq)) # type: ignore
return (np.sum(seq < 0) + 1) / len(seq) # type: ignore


def subtract_fee(df, fee=1):
Expand Down Expand Up @@ -44,7 +55,8 @@ def daily_performance(daily_returns):
daily_returns = np.array(daily_returns, dtype=np.float64)

if len(daily_returns) == 0 or np.std(daily_returns) == 0 or all(x == 0 for x in daily_returns):
return {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0}
return {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0,
"年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}

annual_returns = np.sum(daily_returns) / len(daily_returns) * 252
sharpe_ratio = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252)
Expand All @@ -63,6 +75,7 @@ def daily_performance(daily_returns):
"日胜率": round(win_pct, 4),
"年化波动率": round(annual_volatility, 4),
"非零覆盖": round(none_zero_cover, 4),
"盈亏平衡点": round(cal_break_even_point(daily_returns), 4),
}
return sta

Expand Down Expand Up @@ -133,18 +146,6 @@ def net_value_stats(nv: pd.DataFrame, exclude_zero: bool = False, sub_cost=True)
return res


def cal_break_even_point(seq: List[float]) -> float:
"""计算单笔收益序列的盈亏平衡点
:param seq: 单笔收益序列
:return: 盈亏平衡点
"""
if sum(seq) < 0:
return 1.0
seq = np.cumsum(sorted(seq)) # type: ignore
return (np.sum(seq < 0) + 1) / len(seq) # type: ignore


def evaluate_pairs(pairs: pd.DataFrame, trade_dir: str = "多空") -> dict:
"""评估开平交易记录的表现
Expand Down
40 changes: 40 additions & 0 deletions examples/test_offline/test_weight_backtest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import sys
sys.path.insert(0, ".")
sys.path.insert(0, "..")
import czsc
import pandas as pd

assert czsc.WeightBacktest.version == "V231005"


def run_by_weights():
"""从持仓权重样例数据中回测"""
dfw = pd.read_feather(r"C:\Users\zengb\Desktop\231005\weight_example.feather")
wb = czsc.WeightBacktest(dfw, digits=1, fee_rate=0.0002)

# ------------------------------------------------------------------------------------
# 查看绩效评价
# ------------------------------------------------------------------------------------
print(wb.results['绩效评价'])
# {'开始日期': '20170103',
# '结束日期': '20230731',
# '年化': 0.093, # 品种等权之后的年化收益率
# '夏普': 1.19, # 品种等权之后的夏普比率
# '最大回撤': 0.1397, # 品种等权之后的最大回撤
# '卡玛': 0.67,
# '日胜率': 0.5228, # 品种等权之后的日胜率
# '年化波动率': 0.0782,
# '非零覆盖': 1.0,
# '盈亏平衡点': 0.9782, # 品种等权之后的盈亏平衡点,这个值越小越好,正常策略的范围应该在 0.85~0.98 之间
# '单笔收益': 25.6, # 将所有品种的单笔汇总之后的平均收益,单位是 BP,即 0.01%
# '交易胜率': 0.3717, # 将所有品种的单笔汇总之后的交易胜率
# '持仓天数': 3.69, # 将所有品种的单笔汇总之后的平均持仓天数
# '持仓K线数': 971.66} # 将所有品种的单笔汇总之后的平均持仓 K 线数

# ------------------------------------------------------------------------------------
# 获取指定品种的回测结果
# ------------------------------------------------------------------------------------
symbol_res = wb.results[wb.symbols[0]]
print(symbol_res)

wb.report(res_path=r"C:\Users\zengb\Desktop\231005\weight_example")
25 changes: 25 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""
import pytest
import pandas as pd
import numpy as np
from czsc import utils


Expand Down Expand Up @@ -87,3 +88,27 @@ def test_ranker():
assert dfp['rank'].max() == len(symbols)
assert dfp['rank'].min() == 1
assert dfp['rank'].mean() == 2.5


def test_daily_performance():
from czsc.utils.stats import daily_performance

# Test case 1: empty daily returns
result = daily_performance([])
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}

# Test case 2: daily returns with zero standard deviation
result = daily_performance([1, 1, 1, 1, 1])
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}

# Test case 3: daily returns with all zeros
result = daily_performance([0, 0, 0, 0, 0])
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}

# Test case 4: normal daily returns
daily_returns = np.array([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01])
result = daily_performance(daily_returns)
assert result == {'年化': 2.016, '夏普': 8.27, '最大回撤': 0.02, '卡玛': 100.8, '日胜率': 0.7, '年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7}

result = daily_performance([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01])
assert result == {'年化': 2.016, '夏普': 8.27, '最大回撤': 0.02, '卡玛': 100.8, '日胜率': 0.7, '年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7}

0 comments on commit c788a57

Please sign in to comment.