Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V0.9.33 更新一批代码 #173

Merged
merged 4 commits into from
Oct 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.32 ]
branches: [ master, V0.9.33 ]
pull_request:
branches: [ master ]

Expand Down
6 changes: 4 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
show_sectional_ic,
show_factor_returns,
show_factor_layering,
show_symbol_factor_layering,
)

from czsc.utils.bi_info import (
Expand All @@ -99,12 +100,13 @@

from czsc.utils.features import (
normalize_feature,
normalize_ts_feature,
)

__version__ = "0.9.32"
__version__ = "0.9.33"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20231013"
__date__ = "20231018"



Expand Down
1 change: 1 addition & 0 deletions czsc/signals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@
tas_macd_bc_V230803,
tas_macd_bc_V230804,
tas_macd_bc_ubi_V230804,
tas_slope_V231019,
)

from czsc.signals.pos import (
Expand Down
70 changes: 60 additions & 10 deletions czsc/signals/tas.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from czsc.analyze import CZSC
from czsc.objects import Signal, Direction, BI, RawBar, FX, Mark, ZS
from czsc.traders.base import CzscSignals
from czsc.utils import get_sub_elements, fast_slow_cross, count_last_same, create_single_signal
from czsc.utils import get_sub_elements, fast_slow_cross, count_last_same, create_single_signal, single_linear
from czsc.utils.sig import cross_zero_axis, cal_cross_num, down_cross_count


Expand Down Expand Up @@ -2789,7 +2789,7 @@ def tas_atr_V230630(c: CZSC, **kwargs) -> OrderedDict:
**信号逻辑:**

ATR与收盘价的比值衡量了价格振幅比率的大小,对这个值进行分层。

**信号列表:**

- Signal('日线_D1ATR14_波动V230630_第7层_任意_任意_0')
Expand All @@ -2802,7 +2802,7 @@ def tas_atr_V230630(c: CZSC, **kwargs) -> OrderedDict:
- Signal('日线_D1ATR14_波动V230630_第3层_任意_任意_0')
- Signal('日线_D1ATR14_波动V230630_第2层_任意_任意_0')
- Signal('日线_D1ATR14_波动V230630_第1层_任意_任意_0')

:param c: czsc对象
:param kwargs:

Expand Down Expand Up @@ -2860,15 +2860,15 @@ def tas_rumi_V230704(c: CZSC, **kwargs) -> OrderedDict:
rumi_window = int(kwargs.get('rumi_window', 30))
timeperiod1 = int(kwargs.get('timeperiod1', 3))
timeperiod2 = int(kwargs.get('timeperiod2', 50))

assert rumi_window < timeperiod2, "rumi_window 必须小于 timeperiod2"
freq = c.freq.value
k1, k2, k3 = f"{freq}_D{di}F{timeperiod1}S{timeperiod2}R{rumi_window}_BS辅助V230704".split('_')
v1 = '其他'

if len(c.bars_raw) < di + timeperiod2:
return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)

key1 = update_ma_cache(c, ma_type='SMA', timeperiod=timeperiod1)
key2 = update_ma_cache(c, ma_type='WMA', timeperiod=timeperiod2)
bars = get_sub_elements(c.bars_raw, di=di, n=timeperiod2)
Expand Down Expand Up @@ -3204,14 +3204,14 @@ def tas_angle_V230802(c: CZSC, **kwargs) -> OrderedDict:

-n:统计笔的数量
-di:取第几笔

:return: 信号识别结果
"""
di = int(kwargs.get('di', 1))
n = int(kwargs.get('n', 9))
th = int(kwargs.get('th', 50))
assert 300 > th > 30, "th 取值范围为 30 ~ 300"

freq = c.freq.value
k1, k2, k3 = f"{freq}_D{di}N{n}T{th}_笔角度V230802".split('_')
v1 = '其他'
Expand Down Expand Up @@ -3309,7 +3309,7 @@ def tas_macd_bc_V230804(c: CZSC, **kwargs) -> OrderedDict:
od_dif = max([x.cache[cache_key]['dif'] for x in b1.fx_b.raw_bars + b3.fx_b.raw_bars])
if 0 < b5_dif < od_dif:
v1 = '空头'

if b5.direction == Direction.Down and b5.low < (dd + (gg - dd) / 4):
b5_dif = min([x.cache[cache_key]['dif'] for x in b5.fx_b.raw_bars])
od_dif = min([x.cache[cache_key]['dif'] for x in b1.fx_b.raw_bars + b3.fx_b.raw_bars])
Expand Down Expand Up @@ -3358,11 +3358,61 @@ def tas_macd_bc_ubi_V230804(c: CZSC, **kwargs) -> OrderedDict:
od_dif = max([x.cache[cache_key]['dif'] for x in b2.fx_b.raw_bars + b4.fx_b.raw_bars])
if 0 < b5_dif < od_dif:
v1 = '空头'

if ubi['direction'] == Direction.Down and ubi['low'] < (dd + (gg - dd) / 4):
b5_dif = min([x.cache[cache_key]['dif'] for x in ubi['raw_bars'][-5:]])
od_dif = min([x.cache[cache_key]['dif'] for x in b2.fx_b.raw_bars + b4.fx_b.raw_bars])
if 0 > b5_dif > od_dif:
v1 = '多头'

return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)


def tas_slope_V231019(c: CZSC, **kwargs) -> OrderedDict:
"""DIF趋势线斜率判断多空

参数模板:"{freq}_D{di}DIF{n}斜率T{th}_BS辅助V231019"

**信号逻辑:**

取最近 N 根K线的DIF值计算斜率,然后取 N * 10 根K线的斜率值,计算斜率值的分位数,
如果分位数大于th,则看多,小于1-th,则看空。

**信号列表:**

- Signal('60分钟_D1DIF10斜率T80_BS辅助V231019_看多_任意_任意_0')
- Signal('60分钟_D1DIF10斜率T80_BS辅助V231019_看空_任意_任意_0')

:param cat: CzscSignals对象
:param kwargs: 参数字典
:return: 返回信号结果
"""
di = int(kwargs.get('di', 1))
n = int(kwargs.get('n', 10))
th = int(kwargs.get('th', 80))
assert th > 50 and th < 100, 'th 参数取值范围为 50 ~ 100'

freq = c.freq.value
cache_key = update_macd_cache(c, fastperiod=12, slowperiod=26, signalperiod=9)
k1, k2, k3 = f"{freq}_D{di}DIF{n}斜率T{th}_BS辅助V231019".split('_')
v1 = '其他'
if len(c.bars_raw) < 50:
return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)

cache_slope_key = f"tas_slope_V231019_{di}_{n}"
for i, bar in enumerate(c.bars_raw):
if i < n:
continue

if cache_slope_key not in bar.cache:
dif = [x.cache[cache_key]['dif'] for x in c.bars_raw[i - n: i]]
bar.cache[cache_slope_key] = single_linear(dif)['slope']

bars = get_sub_elements(c.bars_raw, di=di, n=n * 10)
dif_slope = [x.cache.get(cache_slope_key, 0) for x in bars]
q = (dif_slope[-1] - min(dif_slope)) / (max(dif_slope) - min(dif_slope))
if q > th / 100:
v1 = '看多'
elif q < 1 - th / 100:
v1 = '看空'
return create_single_signal(k1=k1, k2=k2, k3=k3, v1=v1)
65 changes: 61 additions & 4 deletions czsc/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
create_dt: 2023/10/06 15:01
describe: 因子(特征)处理
"""
import pandas as pd
from loguru import logger
from sklearn.preprocessing import scale

Expand All @@ -19,10 +20,66 @@ def normalize_feature(df, x_col, **kwargs):
- q: float,缩尾比例, 默认 0.05
"""
df = df.copy()
if df[x_col].isna().sum() > 0:
logger.warning(f"因子列 {x_col} 存在缺失值,已自动剔除,这有可能导致后续分析结果不准确")
df = df.dropna(subset=[x_col])

assert df[x_col].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[x_col].isna().sum())
q = kwargs.get("q", 0.05) # 缩尾比例
df[x_col] = df.groupby("dt")[x_col].transform(lambda x: scale(x.clip(lower=x.quantile(q), upper=x.quantile(1 - q))))
return df


def normalize_ts_feature(df, x_col, n=10, **kwargs):
"""对时间序列数据进行归一化处理

:param df: 因子数据,必须包含 dt, x_col 列,其中 dt 为日期,x_col 为因子值,数据样例:
:param x_col: 因子列名
:param n: 分层数量,默认为10
:param kwargs:

- method: 分层方法,expanding 或 rolling,默认为 expanding
- min_periods: expanding 时的最小样本数量,默认为300

:return: df, 添加了 x_col_norm, x_col_qcut, x_col分层 列
"""
assert df[x_col].nunique() > n, "因子值的取值数量必须大于分层数量"
assert df[x_col].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[x_col].isna().sum())
method = kwargs.get("method", "expanding")
min_periods = kwargs.get("min_periods", 300)

if f"{x_col}_norm" not in df.columns:
if method == "expanding":
df[f"{x_col}_norm"] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)

elif method == "rolling":
df[f"{x_col}_norm"] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
lambda x: (x.iloc[-1] - x.mean()) / x.std(), raw=False)

else:
raise ValueError("method 必须为 expanding 或 rolling")

# 用标准化后的值填充原始值中的缺失值
na_x = df[df[f"{x_col}_norm"].isna()][x_col].values
df.loc[df[f"{x_col}_norm"].isna(), f"{x_col}_norm"] = na_x - na_x.mean() / na_x.std()

if f"{x_col}_qcut" not in df.columns:
if method == "expanding":
df[f'{x_col}_qcut'] = df[x_col].expanding(min_periods=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)

elif method == "rolling":
df[f'{x_col}_qcut'] = df[x_col].rolling(min_periods=min_periods, window=min_periods).apply(
lambda x: pd.qcut(x, q=n, labels=False, duplicates='drop', retbins=False).values[-1], raw=False)

else:
raise ValueError("method 必须为 expanding 或 rolling")

# 用分位数后的值填充原始值中的缺失值
na_x = df[df[f"{x_col}_qcut"].isna()][x_col].values
df.loc[df[f"{x_col}_qcut"].isna(), f"{x_col}_qcut"] = pd.qcut(na_x, q=n, labels=False, duplicates='drop', retbins=False)

if df[f'{x_col}_qcut'].isna().sum() > 0:
logger.warning(f"因子 {x_col} 分层存在 {df[f'{x_col}_qcut'].isna().sum()} 个缺失值,已使用前值填充")
df[f'{x_col}_qcut'] = df[f'{x_col}_qcut'].ffill()

df[f'{x_col}分层'] = df[f'{x_col}_qcut'].apply(lambda x: f'第{str(int(x+1)).zfill(2)}层')

return df
33 changes: 21 additions & 12 deletions czsc/utils/st_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,16 @@ def _stats(df_, type_='持有日'):

df = df.cumsum()
fig = px.line(df, y=df.columns.to_list(), title="日收益累计曲线")
fig.update_xaxes(title='')

# 添加每年的开始第一个日期的竖线
for year in range(df.index.year.min(), df.index.year.max() + 1):
first_date = df[df.index.year == year].index.min()
fig.add_vline(x=first_date, line_dash='dash', line_color='red')

for col in kwargs.get("legend_only_cols", []):
fig.update_traces(visible="legendonly", selector=dict(name=col))

st.plotly_chart(fig, use_container_width=True)


Expand Down Expand Up @@ -183,15 +191,16 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一
df[y_col] = df[y_col] / 10000

if df[x_col].nunique() > n:
df[f'{x_col}分层'] = pd.qcut(df[x_col], q=n, labels=False, duplicates='drop')
df[f'{x_col}分层'] = df[f'{x_col}分层'].apply(lambda x: f'第{str(x+1).zfill(2)}层')
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
st.success(f"因子值分层对应关系:{x_rank}")
df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])
if f'{x_col}分层' not in df.columns:
# 如果因子分层列不存在,先计算因子分层
if df[x_col].nunique() > n:
czsc.normlize_ts_feature(df, x_col, n=n)
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
x_rank = {x_rank[i]: f'第{str(i+1).zfill(2)}层' for i in range(len(x_rank))}
st.success(f"因子值分层对应关系:{x_rank}")
df[f'{x_col}分层'] = df[x_col].apply(lambda x: x_rank[x])

for i in range(n):
df[f'第{str(i+1).zfill(2)}层'] = np.where(df[f'{x_col}分层'] == f'第{str(i+1).zfill(2)}层', df[y_col], 0)
Expand All @@ -210,7 +219,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
long = col1.multiselect("多头组合", layering_cols, default=["第02层"], key="symbol_factor_long")
short = col2.multiselect("空头组合", layering_cols, default=["第01层"], key="symbol_factor_short")
dfr = mrr.copy()
dfr['多头'] = dfr[long].mean(axis=1)
dfr['空头'] = -dfr[short].mean(axis=1)
dfr['多空'] = (dfr['多头'] + dfr['空头']) / 2
dfr['多头'] = dfr[long].sum(axis=1)
dfr['空头'] = -dfr[short].sum(axis=1)
dfr['多空'] = dfr['多头'] + dfr['空头']
show_daily_return(dfr[['多头', '空头', '多空']])
Loading