V0.9.42 更新一批代码 (#186)

* 0.9.42 start coding * update * 0.9.42 disk cache 新增源码变动识别和支持文件类型 * 0.9.42 update * 0.9.42 新增按年统计表现 * 0.9.42 计算最大新高间隔逻辑修复 * 0.9.42 update * 0.9.42 update * 0.9.42 add_macd 优先按计算好的值绘图 * 0.9.42 新增 tas_double_ma_V240208 * 0.9.42 新增 bar_trend_V240209 * 0.9.42 新增 features 模块 * 0.9.42 update * 0.9.42 update * 0.9.42 update
waditu · Feb 17, 2024 · 27013d8 · 27013d8
1 parent f7ba018
commit 27013d8
Show file tree

Hide file tree

Showing 25 changed files with 907 additions and 39 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -5,7 +5,7 @@ name: Python package
 
 on:
   push:
-    branches: [ master, V0.9.41 ]
+    branches: [ master, V0.9.42 ]
   pull_request:
     branches: [ master ]
 

diff --git a/czsc/__init__.py b/czsc/__init__.py
@@ -98,6 +98,7 @@
 # streamlit 量化分析组件
 from czsc.utils.st_components import (
     show_daily_return,
+    show_yearly_stats,
     show_splited_daily,
     show_monthly_return,
     show_correlation,
@@ -126,10 +127,14 @@
     find_most_similarity,
 )
 
-__version__ = "0.9.41"
+from czsc.features.utils import (
+    is_event_feature,
+)
+
+__version__ = "0.9.42"
 __author__ = "zengbin93"
 __email__ = "[email protected]"
-__date__ = "20240114"
+__date__ = "20240121"
 
 
 def welcome():

diff --git a/czsc/connectors/cooperation.py b/czsc/connectors/cooperation.py
@@ -11,6 +11,7 @@
 import czsc
 import pandas as pd
 from tqdm import tqdm
+from loguru import logger
 from datetime import datetime
 from czsc import RawBar, Freq
 
@@ -55,14 +56,27 @@ def get_symbols(name, **kwargs):
     :return:
     """
     if name == "股票":
-        data = dc.stock_basic(nobj=1, status=1)
-        return data['code'].tolist()
+        df = dc.stock_basic(nobj=1, status=1)
+        symbols = [f"{row['code']}#STOCK" for _, row in df.iterrows()]
+        return symbols
 
     if name == "ETF":
-        raise NotImplementedError
+        df = dc.etf_basic(v=2, fields='code,name')
+        dfk = dc.pro_bar(trade_date="2023-11-17", asset="e", v=2)
+        df = df[df['code'].isin(dfk['code'])].reset_index(drop=True)
+        symbols = [f"{row['code']}#ETF" for _, row in df.iterrows()]
+        return symbols
 
     if name == "A股指数":
-        raise NotImplementedError
+        # 指数 https://s0cqcxuy3p.feishu.cn/wiki/KuSAweAAhicvsGk9VPTc1ZWKnAd
+        df = dc.index_basic(v=2, market='SSE,SZSE')
+        symbols = [f"{row['code']}#INDEX" for _, row in df.iterrows()]
+        return symbols
+
+    if name == "南华指数":
+        df = dc.index_basic(v=2, market='NH')
+        symbols = [row['code'] for _, row in df.iterrows()]
+        return symbols
 
     if name == "期货主力":
         kline = dc.future_klines(trade_date="20231101")
@@ -71,6 +85,28 @@ def get_symbols(name, **kwargs):
     raise ValueError(f"{name} 分组无法识别，获取标的列表失败！")
 
 
+def get_min_future_klines(code, sdt, edt, freq='1m'):
+    """分段获取期货1分钟K线后合并"""
+    dates = pd.date_range(start=sdt, end=edt, freq='1M')
+    dates = [d.strftime('%Y%m%d') for d in dates] + [sdt, edt]
+    dates = sorted(list(set(dates)))
+
+    rows = []
+    for sdt_, edt_ in tqdm(zip(dates[:-1], dates[1:]), total=len(dates) - 1):
+        df = dc.future_klines(code=code, sdt=sdt_, edt=edt_, freq=freq)
+        if df.empty:
+            continue
+        logger.info(f"{code}获取K线范围：{df['dt'].min()} - {df['dt'].max()}")
+        rows.append(df)
+
+    df = pd.concat(rows, ignore_index=True)
+    df.rename(columns={'code': 'symbol'}, inplace=True)
+    df['dt'] = pd.to_datetime(df['dt'])
+
+    df = df.drop_duplicates(subset=['dt', 'symbol'], keep='last')
+    return df
+
+
 def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
     """获取 CZSC 库定义的标准 RawBar 对象列表
 
@@ -85,29 +121,44 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
     """
     freq = czsc.Freq(freq)
 
-    if symbol.endswith(".SH") or symbol.endswith(".SZ"):
+    if "SH" in symbol or "SZ" in symbol:
         fq_map = {"前复权": "qfq", "后复权": "hfq", "不复权": None}
         adj = fq_map.get(fq, None)
+
+        code, asset = symbol.split("#")
+
         if freq.value.endswith('分钟'):
-            df = dc.pro_bar(code=symbol, sdt=sdt, edt=edt, freq='min', adj=adj)
+            df = dc.pro_bar(code=code, sdt=sdt, edt=edt, freq='min', adj=adj, asset=asset[0].lower(), v=2)
             df = df[~df['dt'].str.endswith("09:30:00")].reset_index(drop=True)
         else:
-            df = dc.pro_bar(code=symbol, sdt=sdt, edt=edt, freq='day', adj=adj)
+            df = dc.pro_bar(code=code, sdt=sdt, edt=edt, freq='day', adj=adj, asset=asset[0].lower(), v=2)
+
         df.rename(columns={'code': 'symbol'}, inplace=True)
         df['dt'] = pd.to_datetime(df['dt'])
         return czsc.resample_bars(df, target_freq=freq)
 
     if symbol.endswith("9001"):
+        # https://s0cqcxuy3p.feishu.cn/wiki/WLGQwJLWQiWPCZkPV7Xc3L1engg
+        if fq == "前复权":
+            logger.warning("期货主力合约暂时不支持前复权，已自动切换为后复权")
+
+        freq_rd = '1m' if freq.value.endswith('分钟') else '1d'
         if freq.value.endswith('分钟'):
-            df = dc.future_klines(code=symbol, sdt=sdt, edt=edt, freq='1m')
+            df = get_min_future_klines(code=symbol, sdt=sdt, edt=edt, freq='1m')
         else:
-            df = dc.future_klines(code=symbol, sdt=sdt, edt=edt, freq='1d')
-        df.rename(columns={'code': 'symbol'}, inplace=True)
+            df = dc.future_klines(code=symbol, sdt=sdt, edt=edt, freq=freq_rd)
+            df.rename(columns={'code': 'symbol'}, inplace=True)
+
         df['amount'] = df['vol'] * df['close']
         df = df[['symbol', 'dt', 'open', 'close', 'high', 'low', 'vol', 'amount']].copy().reset_index(drop=True)
         df['dt'] = pd.to_datetime(df['dt'])
         return czsc.resample_bars(df, target_freq=freq)
 
+    if symbol.endswith(".NH"):
+        if freq != Freq.D:
+            raise ValueError("南华指数只支持日线数据")
+        df = dc.nh_daily(code=symbol, sdt=sdt, edt=edt)
+
     raise ValueError(f"symbol {symbol} 无法识别，获取数据失败！")
 
 

diff --git a/czsc/features/__init__.py b/czsc/features/__init__.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+"""
+author: zengbin93
+email: [email protected]
+create_dt: 2024/02/14 17:48
+describe: 时序特征因子库
+
+因子函数编写规范：https://s0cqcxuy3p.feishu.cn/wiki/A9yawT6o1il9SrkUoBNchtXjnBK
+"""
+
+from .ret import (
+    RET001,
+    RET002,
+    RET003,
+    RET004,
+    RET005,
+    RET006,
+    RET007,
+    RET008,
+)
+
+from .vpf import (
+    VPF001,
+    VPF002,
+    VPF003,
+    VPF004,
+)
diff --git a/czsc/features/ret.py b/czsc/features/ret.py
@@ -0,0 +1,214 @@
+"""
+用于计算未来收益相关的因子，含有未来信息，不可用于实际交易
+通常用作模型训练、因子评价的标准
+"""
+import numpy as np
+import pandas as pd
+
+
+def RET001(df, **kwargs):
+    """用 close 价格计算未来 N 根K线的收益率
+
+    参数空间：
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET001#{tag}'
+    df[col] = df['close'].shift(-n) / df['close'] - 1
+    df[col] = df[col].fillna(0)
+
+
+def RET002(df, **kwargs):
+    """用 open 价格计算未来 N 根K线的收益率
+
+    参数空间：
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET002#{tag}'
+    df[col] = df['open'].shift(-n - 1) / df['open'].shift(-1) - 1
+    df[col] = df[col].fillna(0)
+
+
+def RET003(df, **kwargs):
+    """未来 N 根K线的收益波动率
+
+    参数空间：
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+        - n: int, 计算未来 N 根K线的收益波动率
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET003#{tag}'
+    df['tmp'] = df['close'].pct_change()
+    df[col] = df['tmp'].rolling(n).std().shift(-n)
+    df[col] = df[col].fillna(0)
+    df.drop(columns=['tmp'], inplace=True)
+
+
+def RET004(df, **kwargs):
+    """未来 N 根K线的最大收益盈亏比
+
+    注意：
+    1. 约束盈亏比的范围是 [0, 10]
+    2. 当未来 N 根K线内收益最小值为0时，会导致计算结果为无穷大，此时将结果设置为10
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+        - n: int, 计算未来 N 根K线的收益盈亏比
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET004#{tag}'
+    df['max_ret'] = df['close'].rolling(n).apply(lambda x: x.max() / x[0] - 1, raw=True)
+    df['min_ret'] = df['close'].rolling(n).apply(lambda x: x.min() / x[0] - 1, raw=True)
+    df[col] = (df['max_ret'] / df['min_ret'].abs()).shift(-n)
+    df[col] = df[col].fillna(0)
+    df[col] = df[col].clip(0, 10)
+    df.drop(columns=['max_ret', 'min_ret'], inplace=True)
+
+
+def RET005(df, **kwargs):
+    """未来 N 根K线的逐K胜率
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+        - n: int, 滚动窗口大小
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET005#{tag}'
+    df['ret'] = df['close'].pct_change()
+    df[col] = df['ret'].rolling(n).apply(lambda x: np.sum(x > 0) / n).shift(-n)
+    df[col] = df[col].fillna(0)
+    df.drop(columns=['ret'], inplace=True)
+
+
+def RET006(df, **kwargs):
+    """未来 N 根K线的逐K盈亏比
+
+    注意：
+    1. 约束盈亏比的范围是 [0, 10]
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+        - n: int, 滚动窗口大小
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET006#{tag}'
+    df['ret'] = df['close'].pct_change()
+    df['mean_win'] = df['ret'].rolling(n).apply(lambda x: np.sum(x[x > 0]) / np.sum(x > 0))
+    df['mean_loss'] = df['ret'].rolling(n).apply(lambda x: np.sum(x[x < 0]) / np.sum(x < 0))
+    df[col] = (df['mean_win'] / df['mean_loss'].abs()).shift(-n)
+    df[col] = df[col].fillna(0)
+    df[col] = df[col].clip(0, 10)
+    df.drop(columns=['ret', 'mean_win', 'mean_loss'], inplace=True)
+
+
+def RET007(df, **kwargs):
+    """未来 N 根K线的最大跌幅
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+        - n: int, 滚动窗口大小
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET007#{tag}'
+    df[col] = df['close'].rolling(n).apply(lambda x: np.min(x) / x[0] - 1, raw=True).shift(-n)
+    df[col] = df[col].fillna(0)
+
+
+def RET008(df, **kwargs):
+    """未来 N 根K线的最大涨幅
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - tag: str, 因子字段标记
+        - n: int, 滚动窗口大小
+
+    :return: None
+    """
+    tag = kwargs.get('tag', 'A')
+    n = kwargs.get('n', 5)
+
+    col = f'F#RET008#{tag}'
+    df[col] = df['close'].rolling(n).apply(lambda x: np.max(x) / x[0] - 1, raw=True).shift(-n)
+    df[col] = df[col].fillna(0)
+
+
+def test_ret_functions():
+    from czsc.connectors import cooperation as coo
+
+    df = coo.dc.pro_bar(code="000001.SZ", freq="day", sdt="2020-01-01", edt="2021-01-31")
+    df['dt'] = pd.to_datetime(df['dt'])
+    df.rename(columns={'code': 'symbol'}, inplace=True)
+
+    RET001(df, tag='A')
+    assert 'F#RET001#A' in df.columns
+
+    RET002(df, tag='A')
+    assert 'F#RET002#A' in df.columns
+
+    RET003(df, tag='A')
+    assert 'F#RET003#A' in df.columns
+
+    RET004(df, tag='A')
+    assert 'F#RET004#A' in df.columns
+
+    RET005(df, tag='A')
+    assert 'F#RET005#A' in df.columns
+
+    RET006(df, tag='A')
+    assert 'F#RET006#A' in df.columns
+
+    RET007(df, tag='A')
+    assert 'F#RET007#A' in df.columns
+
+    RET008(df, tag='A')
+    assert 'F#RET008#A' in df.columns