Skip to content

Commit

Permalink
V0.9.42 更新一批代码 (#186)
Browse files Browse the repository at this point in the history
* 0.9.42 start coding

* update

* 0.9.42 disk cache 新增源码变动识别和支持文件类型

* 0.9.42 update

* 0.9.42 新增按年统计表现

* 0.9.42 计算最大新高间隔逻辑修复

* 0.9.42 update

* 0.9.42 update

* 0.9.42 add_macd 优先按计算好的值绘图

* 0.9.42 新增 tas_double_ma_V240208

* 0.9.42 新增 bar_trend_V240209

* 0.9.42 新增 features 模块

* 0.9.42 update

* 0.9.42 update

* 0.9.42 update
  • Loading branch information
zengbin93 authored Feb 17, 2024
1 parent f7ba018 commit 27013d8
Show file tree
Hide file tree
Showing 25 changed files with 907 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.41 ]
branches: [ master, V0.9.42 ]
pull_request:
branches: [ master ]

Expand Down
9 changes: 7 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
# streamlit 量化分析组件
from czsc.utils.st_components import (
show_daily_return,
show_yearly_stats,
show_splited_daily,
show_monthly_return,
show_correlation,
Expand Down Expand Up @@ -126,10 +127,14 @@
find_most_similarity,
)

__version__ = "0.9.41"
from czsc.features.utils import (
is_event_feature,
)

__version__ = "0.9.42"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20240114"
__date__ = "20240121"


def welcome():
Expand Down
71 changes: 61 additions & 10 deletions czsc/connectors/cooperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import czsc
import pandas as pd
from tqdm import tqdm
from loguru import logger
from datetime import datetime
from czsc import RawBar, Freq

Expand Down Expand Up @@ -55,14 +56,27 @@ def get_symbols(name, **kwargs):
:return:
"""
if name == "股票":
data = dc.stock_basic(nobj=1, status=1)
return data['code'].tolist()
df = dc.stock_basic(nobj=1, status=1)
symbols = [f"{row['code']}#STOCK" for _, row in df.iterrows()]
return symbols

if name == "ETF":
raise NotImplementedError
df = dc.etf_basic(v=2, fields='code,name')
dfk = dc.pro_bar(trade_date="2023-11-17", asset="e", v=2)
df = df[df['code'].isin(dfk['code'])].reset_index(drop=True)
symbols = [f"{row['code']}#ETF" for _, row in df.iterrows()]
return symbols

if name == "A股指数":
raise NotImplementedError
# 指数 https://s0cqcxuy3p.feishu.cn/wiki/KuSAweAAhicvsGk9VPTc1ZWKnAd
df = dc.index_basic(v=2, market='SSE,SZSE')
symbols = [f"{row['code']}#INDEX" for _, row in df.iterrows()]
return symbols

if name == "南华指数":
df = dc.index_basic(v=2, market='NH')
symbols = [row['code'] for _, row in df.iterrows()]
return symbols

if name == "期货主力":
kline = dc.future_klines(trade_date="20231101")
Expand All @@ -71,6 +85,28 @@ def get_symbols(name, **kwargs):
raise ValueError(f"{name} 分组无法识别,获取标的列表失败!")


def get_min_future_klines(code, sdt, edt, freq='1m'):
"""分段获取期货1分钟K线后合并"""
dates = pd.date_range(start=sdt, end=edt, freq='1M')
dates = [d.strftime('%Y%m%d') for d in dates] + [sdt, edt]
dates = sorted(list(set(dates)))

rows = []
for sdt_, edt_ in tqdm(zip(dates[:-1], dates[1:]), total=len(dates) - 1):
df = dc.future_klines(code=code, sdt=sdt_, edt=edt_, freq=freq)
if df.empty:
continue
logger.info(f"{code}获取K线范围:{df['dt'].min()} - {df['dt'].max()}")
rows.append(df)

df = pd.concat(rows, ignore_index=True)
df.rename(columns={'code': 'symbol'}, inplace=True)
df['dt'] = pd.to_datetime(df['dt'])

df = df.drop_duplicates(subset=['dt', 'symbol'], keep='last')
return df


def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
"""获取 CZSC 库定义的标准 RawBar 对象列表
Expand All @@ -85,29 +121,44 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
"""
freq = czsc.Freq(freq)

if symbol.endswith(".SH") or symbol.endswith(".SZ"):
if "SH" in symbol or "SZ" in symbol:
fq_map = {"前复权": "qfq", "后复权": "hfq", "不复权": None}
adj = fq_map.get(fq, None)

code, asset = symbol.split("#")

if freq.value.endswith('分钟'):
df = dc.pro_bar(code=symbol, sdt=sdt, edt=edt, freq='min', adj=adj)
df = dc.pro_bar(code=code, sdt=sdt, edt=edt, freq='min', adj=adj, asset=asset[0].lower(), v=2)
df = df[~df['dt'].str.endswith("09:30:00")].reset_index(drop=True)
else:
df = dc.pro_bar(code=symbol, sdt=sdt, edt=edt, freq='day', adj=adj)
df = dc.pro_bar(code=code, sdt=sdt, edt=edt, freq='day', adj=adj, asset=asset[0].lower(), v=2)

df.rename(columns={'code': 'symbol'}, inplace=True)
df['dt'] = pd.to_datetime(df['dt'])
return czsc.resample_bars(df, target_freq=freq)

if symbol.endswith("9001"):
# https://s0cqcxuy3p.feishu.cn/wiki/WLGQwJLWQiWPCZkPV7Xc3L1engg
if fq == "前复权":
logger.warning("期货主力合约暂时不支持前复权,已自动切换为后复权")

freq_rd = '1m' if freq.value.endswith('分钟') else '1d'
if freq.value.endswith('分钟'):
df = dc.future_klines(code=symbol, sdt=sdt, edt=edt, freq='1m')
df = get_min_future_klines(code=symbol, sdt=sdt, edt=edt, freq='1m')
else:
df = dc.future_klines(code=symbol, sdt=sdt, edt=edt, freq='1d')
df.rename(columns={'code': 'symbol'}, inplace=True)
df = dc.future_klines(code=symbol, sdt=sdt, edt=edt, freq=freq_rd)
df.rename(columns={'code': 'symbol'}, inplace=True)

df['amount'] = df['vol'] * df['close']
df = df[['symbol', 'dt', 'open', 'close', 'high', 'low', 'vol', 'amount']].copy().reset_index(drop=True)
df['dt'] = pd.to_datetime(df['dt'])
return czsc.resample_bars(df, target_freq=freq)

if symbol.endswith(".NH"):
if freq != Freq.D:
raise ValueError("南华指数只支持日线数据")
df = dc.nh_daily(code=symbol, sdt=sdt, edt=edt)

raise ValueError(f"symbol {symbol} 无法识别,获取数据失败!")


Expand Down
27 changes: 27 additions & 0 deletions czsc/features/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
"""
author: zengbin93
email: [email protected]
create_dt: 2024/02/14 17:48
describe: 时序特征因子库
因子函数编写规范:https://s0cqcxuy3p.feishu.cn/wiki/A9yawT6o1il9SrkUoBNchtXjnBK
"""

from .ret import (
RET001,
RET002,
RET003,
RET004,
RET005,
RET006,
RET007,
RET008,
)

from .vpf import (
VPF001,
VPF002,
VPF003,
VPF004,
)
214 changes: 214 additions & 0 deletions czsc/features/ret.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
"""
用于计算未来收益相关的因子,含有未来信息,不可用于实际交易
通常用作模型训练、因子评价的标准
"""
import numpy as np
import pandas as pd


def RET001(df, **kwargs):
"""用 close 价格计算未来 N 根K线的收益率
参数空间:
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET001#{tag}'
df[col] = df['close'].shift(-n) / df['close'] - 1
df[col] = df[col].fillna(0)


def RET002(df, **kwargs):
"""用 open 价格计算未来 N 根K线的收益率
参数空间:
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET002#{tag}'
df[col] = df['open'].shift(-n - 1) / df['open'].shift(-1) - 1
df[col] = df[col].fillna(0)


def RET003(df, **kwargs):
"""未来 N 根K线的收益波动率
参数空间:
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
- n: int, 计算未来 N 根K线的收益波动率
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET003#{tag}'
df['tmp'] = df['close'].pct_change()
df[col] = df['tmp'].rolling(n).std().shift(-n)
df[col] = df[col].fillna(0)
df.drop(columns=['tmp'], inplace=True)


def RET004(df, **kwargs):
"""未来 N 根K线的最大收益盈亏比
注意:
1. 约束盈亏比的范围是 [0, 10]
2. 当未来 N 根K线内收益最小值为0时,会导致计算结果为无穷大,此时将结果设置为10
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
- n: int, 计算未来 N 根K线的收益盈亏比
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET004#{tag}'
df['max_ret'] = df['close'].rolling(n).apply(lambda x: x.max() / x[0] - 1, raw=True)
df['min_ret'] = df['close'].rolling(n).apply(lambda x: x.min() / x[0] - 1, raw=True)
df[col] = (df['max_ret'] / df['min_ret'].abs()).shift(-n)
df[col] = df[col].fillna(0)
df[col] = df[col].clip(0, 10)
df.drop(columns=['max_ret', 'min_ret'], inplace=True)


def RET005(df, **kwargs):
"""未来 N 根K线的逐K胜率
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
- n: int, 滚动窗口大小
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET005#{tag}'
df['ret'] = df['close'].pct_change()
df[col] = df['ret'].rolling(n).apply(lambda x: np.sum(x > 0) / n).shift(-n)
df[col] = df[col].fillna(0)
df.drop(columns=['ret'], inplace=True)


def RET006(df, **kwargs):
"""未来 N 根K线的逐K盈亏比
注意:
1. 约束盈亏比的范围是 [0, 10]
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
- n: int, 滚动窗口大小
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET006#{tag}'
df['ret'] = df['close'].pct_change()
df['mean_win'] = df['ret'].rolling(n).apply(lambda x: np.sum(x[x > 0]) / np.sum(x > 0))
df['mean_loss'] = df['ret'].rolling(n).apply(lambda x: np.sum(x[x < 0]) / np.sum(x < 0))
df[col] = (df['mean_win'] / df['mean_loss'].abs()).shift(-n)
df[col] = df[col].fillna(0)
df[col] = df[col].clip(0, 10)
df.drop(columns=['ret', 'mean_win', 'mean_loss'], inplace=True)


def RET007(df, **kwargs):
"""未来 N 根K线的最大跌幅
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
- n: int, 滚动窗口大小
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET007#{tag}'
df[col] = df['close'].rolling(n).apply(lambda x: np.min(x) / x[0] - 1, raw=True).shift(-n)
df[col] = df[col].fillna(0)


def RET008(df, **kwargs):
"""未来 N 根K线的最大涨幅
:param df: 标准K线数据,DataFrame结构
:param kwargs: 其他参数
- tag: str, 因子字段标记
- n: int, 滚动窗口大小
:return: None
"""
tag = kwargs.get('tag', 'A')
n = kwargs.get('n', 5)

col = f'F#RET008#{tag}'
df[col] = df['close'].rolling(n).apply(lambda x: np.max(x) / x[0] - 1, raw=True).shift(-n)
df[col] = df[col].fillna(0)


def test_ret_functions():
from czsc.connectors import cooperation as coo

df = coo.dc.pro_bar(code="000001.SZ", freq="day", sdt="2020-01-01", edt="2021-01-31")
df['dt'] = pd.to_datetime(df['dt'])
df.rename(columns={'code': 'symbol'}, inplace=True)

RET001(df, tag='A')
assert 'F#RET001#A' in df.columns

RET002(df, tag='A')
assert 'F#RET002#A' in df.columns

RET003(df, tag='A')
assert 'F#RET003#A' in df.columns

RET004(df, tag='A')
assert 'F#RET004#A' in df.columns

RET005(df, tag='A')
assert 'F#RET005#A' in df.columns

RET006(df, tag='A')
assert 'F#RET006#A' in df.columns

RET007(df, tag='A')
assert 'F#RET007#A' in df.columns

RET008(df, tag='A')
assert 'F#RET008#A' in df.columns
Loading

0 comments on commit 27013d8

Please sign in to comment.