Skip to content

Commit

Permalink
0.9.57 新增 cross_sectional_strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Aug 2, 2024
1 parent 8e60f00 commit 18feec7
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 2 deletions.
3 changes: 2 additions & 1 deletion czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@
)

from czsc.eda import (
remove_beta_effects, vwap, twap
remove_beta_effects, vwap, twap,
cross_sectional_strategy,
)


Expand Down
47 changes: 46 additions & 1 deletion czsc/eda.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
create_dt: 2023/2/7 13:17
describe: 用于探索性分析的函数
"""
import loguru
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge, LinearRegression, Lasso


def vwap(price: np.array, volume: np.array, **kwargs) -> float:
Expand Down Expand Up @@ -40,7 +42,6 @@ def remove_beta_effects(df, **kwargs):
:return: DataFrame
"""
from sklearn.linear_model import Ridge, LinearRegression, Lasso

linear_model = kwargs.get("linear_model", "ridge")
linear = {
Expand All @@ -53,11 +54,15 @@ def remove_beta_effects(df, **kwargs):

factor = kwargs.get("factor")
betas = kwargs.get("betas")
logger = kwargs.get("logger", loguru.logger)

assert factor is not None and betas is not None, "factor 和 betas 参数必须指定"
assert isinstance(betas, list), "betas 参数必须为列表"
assert factor in df.columns, f"数据中不包含因子 {factor}"
assert all([x in df.columns for x in betas]), f"数据中不包含全部 beta {betas}"

logger.info(f"去除 beta 对因子 {factor} 的影响, 使用 {linear_model} 模型, betas: {betas}")

rows = []
for dt, dfg in df.groupby("dt"):
dfg = dfg.copy().dropna(subset=[factor] + betas)
Expand All @@ -72,3 +77,43 @@ def remove_beta_effects(df, **kwargs):

dfr = pd.concat(rows, ignore_index=True)
return dfr


def cross_sectional_strategy(df, factor, **kwargs):
"""根据截面因子值构建多空组合
:param df: pd.DataFrame, 包含因子列的数据, 必须包含 dt, symbol, factor 列
:param factor: str, 因子列名称
:param kwargs:
- factor_direction: str, 因子方向,positive 或 negative
- long_num: int, 多头持仓数量
- short_num: int, 空头持仓数量
- logger: loguru.logger, 日志记录器
:return: pd.DataFrame, 包含 weight 列的数据
"""
factor_direction = kwargs.get("factor_direction", "positive")
long_num = kwargs.get("long_num", 5)
short_num = kwargs.get("short_num", 5)
logger = kwargs.get("logger", loguru.logger)

assert factor in df.columns, f"{factor} 不在 df 中"
assert factor_direction in ["positive", "negative"], f"factor_direction 参数错误"

df = df.copy()
if factor_direction == "negative":
df[factor] = -df[factor]

df['weight'] = 0
for dt, dfg in df.groupby("dt"):
if len(dfg) < long_num + short_num:
logger.warning(f"{dt} 截面数据量过小,跳过;仅有 {len(dfg)} 条数据,需要 {long_num + short_num} 条数据")
continue

dfa = dfg.sort_values(factor, ascending=False).head(long_num)
dfb = dfg.sort_values(factor, ascending=True).head(short_num)
df.loc[dfa.index, "weight"] = 1 / long_num
df.loc[dfb.index, "weight"] = -1 / short_num

return df
15 changes: 15 additions & 0 deletions czsc/utils/corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ def cross_sectional_ic(df, x_col="open", y_col="n1b", method="spearman", **kwarg
"IC绝对值>2%占比": 0,
"累计IC回归R2": 0,
"累计IC回归斜率": 0,
"月胜率": 0,
"月均值": 0,
"年胜率": 0,
"年均值": 0,
}
if df.empty:
return df, res
Expand All @@ -143,4 +147,15 @@ def cross_sectional_ic(df, x_col="open", y_col="n1b", method="spearman", **kwarg

lr_ = single_linear(y=df["ic"].cumsum().to_list())
res.update({"累计IC回归R2": lr_["r2"], "累计IC回归斜率": lr_["slope"]})

monthly_ic = df.groupby(df["dt"].dt.strftime("%Y年%m月"))["ic"].mean().to_dict()
monthly_win_rate = len([1 for x in monthly_ic.values() if np.sign(x) == np.sign(res["IC均值"])]) / len(monthly_ic)
res["月胜率"] = round(monthly_win_rate, 4)
res["月均值"] = round(np.mean(list(monthly_ic.values())), 4)

yearly_ic = df.groupby(df["dt"].dt.strftime("%Y年"))["ic"].mean().to_dict()
yearly_win_rate = len([1 for x in yearly_ic.values() if np.sign(x) == np.sign(res["IC均值"])]) / len(yearly_ic)
res["年胜率"] = round(yearly_win_rate, 4)
res["年均值"] = round(np.mean(list(yearly_ic.values())), 4)

return df, res

0 comments on commit 18feec7

Please sign in to comment.