From ff0eef08fea0837a7cf1ecd65d15d8f67786314a Mon Sep 17 00:00:00 2001
From: zengbin93 <zeng_bin8888@163.com>
Date: Thu, 28 Mar 2024 20:20:22 +0800
Subject: [PATCH] =?UTF-8?q?V0.9.46=20=E6=9B=B4=E6=96=B0=E4=B8=80=E6=89=B9?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=20(#190)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 0.9.46 start coding

* 0.9.46 PSI 测试开发

* 0.9.46 update

* 0.9.46 新增 optuna 超参分析

* 0.9.46 disk cache 增加默认path

* 0.9.46 新增 rolling_tanh 函数

* 0.9.46 新增CCF因子函数

* 0.9.46 新增最大回撤分析组件

* 0.9.46 新增期货调仓函数
---
 .github/workflows/pythonpackage.yml |  2 +-
 README.md                           |  2 +-
 czsc/__init__.py                    | 14 ++++-
 czsc/connectors/research.py         |  3 +-
 czsc/connectors/tq_connector.py     | 67 +++++++++++++++++++-
 czsc/features/__init__.py           |  4 ++
 czsc/features/tas.py                | 51 ++++++++++++++++
 czsc/features/utils.py              | 94 +++++++++++++++++++++++++++++
 czsc/utils/__init__.py              |  5 +-
 czsc/utils/bar_generator.py         | 24 ++++++++
 czsc/utils/cache.py                 |  2 +-
 czsc/utils/corr.py                  | 14 ++---
 czsc/utils/optuna.py                | 50 +++++++++++++++
 czsc/utils/st_components.py         | 76 +++++++++++++++++++++--
 czsc/utils/stats.py                 | 39 ++++++++++++
 docs/requirements.txt               |  3 +-
 examples/develop/psi.py             | 61 +++++++++++++++++++
 examples/qmt_realtime.py            |  3 -
 examples/signals_dev/fenlei.py      |  7 +++
 requirements.txt                    |  3 +-
 test/test_features.py               | 25 ++++++++
 21 files changed, 522 insertions(+), 27 deletions(-)
 create mode 100644 czsc/features/tas.py
 create mode 100644 czsc/utils/optuna.py
 create mode 100644 examples/develop/psi.py
 create mode 100644 examples/signals_dev/fenlei.py

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 4e706dcac..f6ca04765 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -5,7 +5,7 @@ name: Python package
 
 on:
   push:
-    branches: [ master, V0.9.45 ]
+    branches: [ master, V0.9.46 ]
   pull_request:
     branches: [ master ]
 
diff --git a/README.md b/README.md
index a7644f9a6..97712fce3 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ pip install git@github.com:waditu/czsc.git -U
 
 直接从github指定分支安装最新版：
 ```
-pip install git+https://github.com/waditu/czsc.git@V0.9.41 -U
+pip install git+https://github.com/waditu/czsc.git@V0.9.46 -U
 ```
 
 从`pypi`安装：
diff --git a/czsc/__init__.py b/czsc/__init__.py
index 2a8e00e76..436f9c744 100644
--- a/czsc/__init__.py
+++ b/czsc/__init__.py
@@ -45,6 +45,8 @@
     ExitsOptimize,
 )
 from czsc.utils import (
+    format_standard_kline,
+
     KlineChart,
     WordWriter,
     BarGenerator,
@@ -81,6 +83,7 @@
     holds_performance,
     net_value_stats,
     subtract_fee,
+    top_drawdowns,
 
     home_path,
     DiskCache,
@@ -94,6 +97,9 @@
     DataClient,
     set_url_token,
     get_url_token,
+
+    optuna_study,
+    optuna_good_params,
 )
 
 # 交易日历工具
@@ -121,6 +127,8 @@
     show_stoploss_by_direction,
     show_cointegration,
     show_out_in_compare,
+    show_optuna_study,
+    show_drawdowns,
 )
 
 from czsc.utils.bi_info import (
@@ -144,12 +152,14 @@
     rolling_compare,
     rolling_scale,
     rolling_slope,
+    rolling_tanh,
+    feature_adjust,
 )
 
-__version__ = "0.9.45"
+__version__ = "0.9.46"
 __author__ = "zengbin93"
 __email__ = "zeng_bin8888@163.com"
-__date__ = "20240308"
+__date__ = "20240318"
 
 
 def welcome():
diff --git a/czsc/connectors/research.py b/czsc/connectors/research.py
index 55215c3ab..622170881 100644
--- a/czsc/connectors/research.py
+++ b/czsc/connectors/research.py
@@ -45,6 +45,7 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
     :param kwargs:
     :return:
     """
+    raw_bars = kwargs.get('raw_bars', True)
     kwargs['fq'] = fq
     file = glob.glob(os.path.join(cache_path, "*", f"{symbol}.parquet"))[0]
     freq = czsc.Freq(freq)
@@ -54,5 +55,5 @@ def get_raw_bars(symbol, freq, sdt, edt, fq='前复权', **kwargs):
     kline = kline[(kline['dt'] >= pd.to_datetime(sdt)) & (kline['dt'] <= pd.to_datetime(edt))]
     if kline.empty:
         return []
-    _bars = czsc.resample_bars(kline, freq, raw_bars=True, base_freq='1分钟')
+    _bars = czsc.resample_bars(kline, freq, raw_bars=raw_bars, base_freq='1分钟')
     return _bars
diff --git a/czsc/connectors/tq_connector.py b/czsc/connectors/tq_connector.py
index 11cb3d42d..c415abe01 100644
--- a/czsc/connectors/tq_connector.py
+++ b/czsc/connectors/tq_connector.py
@@ -204,7 +204,14 @@ def is_trade_time(trade_time: Optional[str] = None):
 
 
 def get_daily_backup(api: TqApi, **kwargs):
-    """获取每日账户中需要备份的信息"""
+    """获取每日账户中需要备份的信息
+
+    https://doc.shinnytech.com/tqsdk/latest/reference/tqsdk.objs.html?highlight=account#tqsdk.objs.Order
+    https://doc.shinnytech.com/tqsdk/latest/reference/tqsdk.objs.html?highlight=account#tqsdk.objs.Position
+    https://doc.shinnytech.com/tqsdk/latest/reference/tqsdk.objs.html?highlight=account#tqsdk.objs.Account
+
+    :param api: TqApi, 天勤API实例
+    """
     orders = api.get_order()
     trades = api.get_trade()
     position = api.get_position()
@@ -229,3 +236,61 @@ def get_daily_backup(api: TqApi, **kwargs):
         "account": account,
     }
     return backup
+
+
+def adjust_portfolio(api: TqApi, portfolio, account=None, **kwargs):
+    """调整账户组合
+
+    **注意：** 此函数会阻塞，直到调仓完成；使用前请仔细阅读 TargetPosTask 的源码和文档，确保了解其工作原理
+
+    :param api: TqApi, 天勤API实例
+    :param account: str, 天勤账户
+    :param portfolio: dict, 组合配置，key 为合约代码，value 为配置信息; 样例数据：
+
+        {
+            "KQ.m@CFFEX.T": {"target_volume": 10, "price": "PASSIVE", "offset_priority": "今昨,开"},
+            "KQ.m@CFFEX.TS": {"target_volume": 0, "price": "ACTIVE", "offset_priority": "今昨,开"},
+            "KQ.m@CFFEX.TF": {"target_volume": 30, "price": "PASSIVE", "offset_priority": "今昨,开"}
+        }
+
+    :param kwargs: dict, 其他参数
+    """
+    symbol_infos = {}
+    for symbol, conf in portfolio.items():
+        quote = api.get_quote(symbol)
+
+        lots = conf.get("target_volume", 0)
+        price = conf.get("price", "PASSIVE")
+        offset_priority = conf.get("offset_priority", "今昨,开")
+
+        # 踩坑记录：TargetPosTask 的 symbol 必须是合约代码
+        contract = quote.underlying_symbol if "@" in symbol else symbol
+        target_pos = TargetPosTask(api, contract, price=price, offset_priority=offset_priority, account=account)
+        target_pos.set_target_volume(int(lots))
+        symbol_infos[symbol] = {"quote": quote, "target_pos": target_pos, "lots": lots}
+
+    while True:
+        api.wait_update()
+
+        completed = []
+        for symbol, info in symbol_infos.items():
+            quote = info["quote"]
+            target_pos: TargetPosTask = info["target_pos"]
+            lots = info["lots"]
+            contract = quote.underlying_symbol if "@" in symbol else symbol
+
+            logger.info(f"调整仓位：{quote.datetime} - {contract}; 目标持仓：{lots}手; 当前持仓：{target_pos._pos.pos}手")
+
+            if target_pos._pos.pos == lots:
+                completed.append(True)
+                logger.info(f"调仓完成：{quote.datetime} - {contract}; {lots}手")
+            else:
+                completed.append(False)
+
+        if all(completed):
+            break
+
+    if kwargs.get("close_api", True):
+        api.close()
+
+    return api
diff --git a/czsc/features/__init__.py b/czsc/features/__init__.py
index 08bc3518e..9e34e3c68 100644
--- a/czsc/features/__init__.py
+++ b/czsc/features/__init__.py
@@ -24,4 +24,8 @@
     VPF002,
     VPF003,
     VPF004,
+)
+
+from .tas import (
+    CCF
 )
\ No newline at end of file
diff --git a/czsc/features/tas.py b/czsc/features/tas.py
new file mode 100644
index 000000000..6ba8a14a2
--- /dev/null
+++ b/czsc/features/tas.py
@@ -0,0 +1,51 @@
+"""
+技术指标因子
+"""
+import inspect
+import hashlib
+import pandas as pd
+
+
+def CCF(df, **kwargs):
+    """使用 CZSC 库中的 factor 识别因子，主要用于识别缠论/形态因子
+
+    :param df: 标准K线数据，DataFrame结构
+    :param kwargs: 其他参数
+
+        - czsc_factor: dict, 缠论因子配置，样例：
+
+            {
+                "signals_all": ["日线_D1_表里关系V230101_向上_任意_任意_0"],
+                "signals_any": [],
+                "signals_not": ["日线_D1_涨跌停V230331_涨停_任意_任意_0"],
+            }
+
+        - freq: str, default '日线'，K线级别
+        - tag: str, default None，标签，用于区分不同的因子
+
+    :return: pd.DataFrame
+    """
+    from czsc.objects import Factor
+    from czsc.utils import format_standard_kline
+    from czsc.traders.base import generate_czsc_signals
+    from czsc.traders.sig_parse import get_signals_config
+
+    czsc_factor = kwargs.get('czsc_factor', None)
+    freq = kwargs.get('freq', '日线')
+    assert czsc_factor is not None and isinstance(czsc_factor, dict), "factor 参数必须指定"
+    tag = kwargs.get('tag', hashlib.sha256(f"{czsc_factor}_{freq}".encode()).hexdigest().upper()[:6])
+
+    factor_name = inspect.stack()[0][3]
+    factor_col = f'F#{factor_name}#{tag}'
+
+    czsc_factor = Factor.load(czsc_factor)
+    signals_seq = czsc_factor.signals_all + czsc_factor.signals_any + czsc_factor.signals_not
+    signals_config = get_signals_config([x.signal for x in signals_seq])
+
+    bars = format_standard_kline(df, freq=freq)
+    dfs = generate_czsc_signals(bars, signals_config, init_n=300, sdt=bars[0].dt, df=True)
+    dfs[factor_col] = dfs.apply(czsc_factor.is_match, axis=1).astype(int)
+
+    df = pd.merge(df, dfs[['dt', factor_col]], on='dt', how='left')
+    df[factor_col] = df[factor_col].fillna(0)
+    return df
diff --git a/czsc/features/utils.py b/czsc/features/utils.py
index 7ead08350..1d69826cb 100644
--- a/czsc/features/utils.py
+++ b/czsc/features/utils.py
@@ -187,6 +187,26 @@ def rolling_scale(df: pd.DataFrame, col: str, window=300, min_periods=100, new_c
     return df
 
 
+def rolling_tanh(df: pd.DataFrame, col: str, window=300, min_periods=100, new_col=None, **kwargs):
+    """对序列进行滚动 tanh 变换
+
+    双曲正切函数：https://baike.baidu.com/item/%E5%8F%8C%E6%9B%B2%E6%AD%A3%E5%88%87%E5%87%BD%E6%95%B0/15469414
+
+    :param df: pd.DataFrame, 待计算的数据
+    :param col: str, 待计算的列
+    :param window: int, 滚动窗口大小, 默认为300
+    :param min_periods: int, 最小计算周期, 默认为100
+    :param new_col: str, 新列名，默认为 None, 表示使用 f'{col}_scale' 作为新列名
+    """
+    if kwargs.get("copy", False):
+        df = df.copy()
+    new_col = new_col if new_col else f'{col}_tanh'
+    df = df.sort_values("dt", ascending=True).reset_index(drop=True)
+    df[new_col] = df[col].rolling(window=window, min_periods=min_periods).apply(lambda x: np.tanh(scale(x))[-1])    # type: ignore
+    df[new_col] = df[new_col].fillna(0)
+    return df
+
+
 def rolling_slope(df: pd.DataFrame, col: str, window=300, min_periods=100, new_col=None, **kwargs):
     """计算序列的滚动斜率
 
@@ -234,3 +254,77 @@ def __lr_slope(x):
 
     df[new_col] = df[new_col].fillna(0)
     return df
+
+
+def feature_adjust_V230101(df: pd.DataFrame, fcol, **kwargs):
+    """特征调整函数：对特征进行调整，使其符合持仓权重的定义
+
+    方法说明：对因子进行滚动相关系数计算，然后对因子值用 maxabs_scale 进行归一化，最后乘以滚动相关系数的符号
+
+    :param df: pd.DataFrame, 必须包含 dt、symbol、price 列，以及因子列
+    :param fcol: str 因子列名
+    :param kwargs: dict
+    """
+    window = kwargs.get("window", 1000)
+    min_periods = kwargs.get("min_periods", 200)
+
+    df = df.copy().sort_values("dt", ascending=True).reset_index(drop=True)
+    df['n1b'] = df['price'].shift(-1) / df['price'] - 1
+    df['corr'] = df[fcol].rolling(window=window, min_periods=min_periods).corr(df['n1b'])
+    df['corr'] = df['corr'].shift(5).fillna(0)
+
+    df = rolling_scale(df, col=fcol, window=window, min_periods=min_periods,
+                       new_col='weight', method='maxabs_scale', copy=True)
+    df['weight'] = df['weight'] * np.sign(df['corr'])
+
+    df.drop(['n1b', 'corr'], axis=1, inplace=True)
+    return df
+
+
+def feature_adjust_V240323(df: pd.DataFrame, fcol, **kwargs):
+    """特征调整函数：对特征进行调整，使其符合持仓权重的定义
+
+    方法说明：对因子进行滚动相关系数计算，然后对因子值用 scale + tanh 进行归一化，最后乘以滚动相关系数的符号
+
+    :param df: pd.DataFrame, 必须包含 dt、symbol、price 列，以及因子列
+    :param fcol: str 因子列名
+    :param kwargs: dict
+    """
+    window = kwargs.get("window", 1000)
+    min_periods = kwargs.get("min_periods", 200)
+
+    df = df.copy().sort_values("dt", ascending=True).reset_index(drop=True)
+    df['n1b'] = df['price'].shift(-1) / df['price'] - 1
+    df['corr'] = df[fcol].rolling(window=window, min_periods=min_periods).corr(df['n1b'])
+    df['corr'] = df['corr'].shift(5).fillna(0)
+
+    df = rolling_tanh(df, col=fcol, window=window, min_periods=min_periods, new_col='weight')
+    df['weight'] = df['weight'] * np.sign(df['corr'])
+
+    df.drop(['n1b', 'corr'], axis=1, inplace=True)
+    return df
+
+
+def feature_adjust(df: pd.DataFrame, fcol, method, **kwargs):
+    """特征调整函数：对特征进行调整，使其符合持仓权重的定义
+
+    :param df: pd.DataFrame, 待调整的数据
+    :param fcol: str, 因子列名
+    :param method: str, 调整方法
+
+        - V230101: 对因子进行滚动相关系数计算，然后对因子值用 maxabs_scale 进行归一化，最后乘以滚动相关系数的符号
+        - V240323: 对因子进行滚动相关系数计算，然后对因子值用 scale + tanh 进行归一化，最后乘以滚动相关系数的符号
+
+    :param kwargs: dict
+
+        - window: int, 滚动窗口大小
+        - min_periods: int, 最小计算周期
+
+    :return: pd.DataFrame, 新增 weight 列
+    """
+    if method == "V230101":
+        return feature_adjust_V230101(df, fcol, **kwargs)
+    elif method == "V240323":
+        return feature_adjust_V240323(df, fcol, **kwargs)
+    else:
+        raise ValueError(f"Unknown method: {method}")
diff --git a/czsc/utils/__init__.py b/czsc/utils/__init__.py
index 1425c04e6..91e3fa7ec 100644
--- a/czsc/utils/__init__.py
+++ b/czsc/utils/__init__.py
@@ -10,7 +10,7 @@
 from .echarts_plot import kline_pro, heat_map
 from .word_writer import WordWriter
 from .corr import nmi_matrix, single_linear, cross_sectional_ic
-from .bar_generator import BarGenerator, freq_end_time, resample_bars
+from .bar_generator import BarGenerator, freq_end_time, resample_bars, format_standard_kline
 from .bar_generator import is_trading_time, get_intraday_times, check_freq_and_market
 from .io import dill_dump, dill_load, read_json, save_json
 from .sig import check_pressure_support, check_gap_info, is_bis_down, is_bis_up, get_sub_elements, is_symmetry_zs
@@ -18,12 +18,13 @@
 from .plotly_plot import KlineChart
 from .trade import cal_trade_price, update_nbars, update_bbars, update_tbars, risk_free_returns, resample_to_daily
 from .cross import CrossSectionalPerformance, cross_sectional_ranker
-from .stats import daily_performance, net_value_stats, subtract_fee, weekly_performance, holds_performance
+from .stats import daily_performance, net_value_stats, subtract_fee, weekly_performance, holds_performance, top_drawdowns
 from .signal_analyzer import SignalAnalyzer, SignalPerformance
 from .cache import home_path, get_dir_size, empty_cache_path, DiskCache, disk_cache
 from .index_composition import index_composition
 from .data_client import DataClient, set_url_token, get_url_token
 from .oss import AliyunOSS
+from .optuna import optuna_study, optuna_good_params
 
 
 sorted_freqs = ['Tick', '1分钟', '2分钟', '3分钟', '4分钟', '5分钟', '6分钟', '10分钟', '12分钟',
diff --git a/czsc/utils/bar_generator.py b/czsc/utils/bar_generator.py
index e484529fa..bf6248260 100644
--- a/czsc/utils/bar_generator.py
+++ b/czsc/utils/bar_generator.py
@@ -39,6 +39,30 @@ def get_intraday_times(freq='1分钟', market="A股"):
     return freq_market_times[f"{freq}_{market}"]
 
 
+def format_standard_kline(df: pd.DataFrame, freq: str):
+    """格式化标准K线数据为 CZSC 标准数据结构 RawBar 列表
+
+    :param df: 标准K线数据，DataFrame结构
+
+        ===================  =========  ======  =======  ======  =====  ===========  ===========
+        dt                   symbol       open    close    high    low          vol       amount
+        ===================  =========  ======  =======  ======  =====  ===========  ===========
+        2023-11-17 00:00:00  689009.SH   33.52    33.41   33.69  33.38  1.97575e+06  6.61661e+07
+        2023-11-20 00:00:00  689009.SH   33.4     32.91   33.45  32.25  5.15016e+06  1.68867e+08
+        ===================  =========  ======  =======  ======  =====  ===========  ===========
+
+    :param freq: K线级别
+    :return: list of RawBar
+    """
+    # from czsc.objects import RawBar, Freq
+    bars = []
+    for i, row in df.iterrows():
+        bar = RawBar(id=i, symbol=row['symbol'], dt=row['dt'], open=row['open'], close=row['close'],
+                     high=row['high'], low=row['low'], vol=row['vol'], amount=row['amount'], freq=Freq(freq))
+        bars.append(bar)
+    return bars
+
+
 def check_freq_and_market(time_seq: List[AnyStr], freq: Optional[AnyStr] = None):
     """检查时间序列是否为同一周期，是否为同一市场
 
diff --git a/czsc/utils/cache.py b/czsc/utils/cache.py
index 28244cf92..fe940a811 100644
--- a/czsc/utils/cache.py
+++ b/czsc/utils/cache.py
@@ -159,7 +159,7 @@ def remove(self, k: str, suffix: str = "pkl"):
         Path.unlink(file) if Path.exists(file) else None
 
 
-def disk_cache(path: str, suffix: str = "pkl", ttl: int = -1):
+def disk_cache(path: str = home_path, suffix: str = "pkl", ttl: int = -1):
     """缓存装饰器，支持多种数据格式
 
     :param path: 缓存文件夹路径
diff --git a/czsc/utils/corr.py b/czsc/utils/corr.py
index 8c53212ac..da35337fb 100644
--- a/czsc/utils/corr.py
+++ b/czsc/utils/corr.py
@@ -11,17 +11,10 @@
 """
 import numpy as np
 import pandas as pd
-import seaborn as sns
-import matplotlib.pyplot as plt
-from sklearn import metrics
 from tqdm import tqdm
 from typing import Union
 
 
-plt.rcParams['font.sans-serif'] = ['SimHei']    # 用来正常显示中文标签
-plt.rcParams['axes.unicode_minus'] = False      # 用来正常显示负号
-
-
 def nmi_matrix(df: pd.DataFrame, heatmap=False) -> pd.DataFrame:
     """计算高维标准化互信息并以矩阵形式输出
 
@@ -29,6 +22,13 @@ def nmi_matrix(df: pd.DataFrame, heatmap=False) -> pd.DataFrame:
     :param heatmap: 是否绘制热力图
     :return:
     """
+    import seaborn as sns
+    import matplotlib.pyplot as plt
+    from sklearn import metrics
+
+    plt.rcParams['font.sans-serif'] = ['SimHei']    # 用来正常显示中文标签
+    plt.rcParams['axes.unicode_minus'] = False      # 用来正常显示负号
+
     cols = df.columns.to_list()
 
     m_dict = {}
diff --git a/czsc/utils/optuna.py b/czsc/utils/optuna.py
new file mode 100644
index 000000000..938e24e21
--- /dev/null
+++ b/czsc/utils/optuna.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+"""
+author: zengbin93
+email: zeng_bin8888@163.com
+create_dt: 2024/3/21 13:56
+describe: optuna 工具函数
+"""
+import hashlib
+import optuna
+import inspect
+import pandas as pd
+
+
+def optuna_study(objective, direction="maximize", n_trials=100, **kwargs):
+    """使用optuna进行参数优化"""
+    objective_code = inspect.getsource(objective)
+    study_name = hashlib.md5(f"{objective_code}_{direction}".encode("utf-8")).hexdigest().upper()[:12]
+    study = optuna.create_study(direction=direction, study_name=study_name)
+
+    timeout = kwargs.pop("timeout", None)
+    n_jobs = kwargs.pop("n_jobs", 1)
+    study.optimize(objective, n_trials=n_trials, timeout=timeout, n_jobs=n_jobs, **kwargs)
+    return study
+
+
+def optuna_good_params(study: optuna.Study, keep=0.2) -> pd.DataFrame:
+    """获取optuna优化结果中的最优参数
+
+    :param study: optuna.study.Study
+    :param keep: float, 保留最优参数的比例, 默认0.2
+        如果keep小于0，则按比例保留；如果keep大于0，则保留keep个参数组
+    :return: pd.DataFrame, 最优参数组列表
+    """
+    assert keep > 0, "keep必须大于0"
+    params = []
+    for trail in study.trials:
+        if trail.state != optuna.trial.TrialState.COMPLETE:
+            continue
+        if trail.value is None:
+            continue
+
+        p = {"params": trail.params, "objective": trail.value}
+        params.append(p)
+
+    n = int(len(params) * keep) if keep < 1 else int(keep)
+    reverse = study.direction == 2
+    params = sorted(params, key=lambda x: x['objective'], reverse=reverse)
+    dfp = pd.DataFrame(params[:n])
+    dfp = dfp.drop_duplicates(subset=['params'], keep='first').reset_index(drop=True)
+    return dfp
diff --git a/czsc/utils/st_components.py b/czsc/utils/st_components.py
index 924b75006..ff53a0a4a 100644
--- a/czsc/utils/st_components.py
+++ b/czsc/utils/st_components.py
@@ -1,5 +1,6 @@
 import czsc
 import hashlib
+import optuna
 import numpy as np
 import pandas as pd
 import streamlit as st
@@ -15,7 +16,7 @@ def show_daily_return(df, **kwargs):
     :param df: pd.DataFrame，数据源
     :param kwargs:
 
-        - title: str，标题
+        - sub_title: str，标题
         - stat_hold_days: bool，是否展示持有日绩效指标，默认为 True
         - legend_only_cols: list，仅在图例中展示的列名
         - use_st_table: bool，是否使用 st.table 展示绩效指标，默认为 False
@@ -73,10 +74,9 @@ def _stats(df_, type_='持有日'):
     use_st_table = kwargs.get("use_st_table", False)
 
     with st.container():
-        title = kwargs.get("title", "")
-        if title:
-            st.subheader(title)
-            st.divider()
+        sub_title = kwargs.get("sub_title", "")
+        if sub_title:
+            st.subheader(sub_title, divider="rainbow")
 
         with st.expander("交易日绩效指标", expanded=True):
             if use_st_table:
@@ -641,7 +641,7 @@ def show_ts_self_corr(df, col, **kwargs):
         st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8])
         c1, c2, c3, c4 = st.columns(4)
         min_periods = int(c1.number_input('最小滑动窗口长度', value=20, min_value=0, step=1))
-        window = int(c2.number_input('滑动窗口长度', value=0, step=1, help='0 表示按 expanding 方式滑动'))
+        window = int(c2.number_input('滑动窗口长度', value=200, step=1))
         corr_method = c3.selectbox('相关系数计算方法', ['pearson', 'kendall', 'spearman'])
         n = int(c4.number_input('自相关滞后阶数', value=1, min_value=1, step=1))
 
@@ -808,3 +808,67 @@ def show_out_in_compare(df, ret_col, mid_dt, **kwargs):
         }
     )
     st.dataframe(df_stats, use_container_width=True)
+
+
+def show_optuna_study(study: optuna.Study, **kwargs):
+    # https://optuna.readthedocs.io/en/stable/reference/visualization/index.html
+    # https://zh-cn.optuna.org/reference/visualization.html
+    from czsc.utils.optuna import optuna_good_params
+
+    sub_title = kwargs.pop("sub_title", "Optuna Study Visualization")
+    if sub_title:
+        anchor = hashlib.md5(sub_title.encode("utf-8")).hexdigest().upper()[:6]
+        st.subheader(sub_title, divider="rainbow", anchor=anchor)
+
+    fig = optuna.visualization.plot_contour(study)
+    st.plotly_chart(fig, use_container_width=True)
+
+    fig = optuna.visualization.plot_slice(study)
+    st.plotly_chart(fig, use_container_width=True)
+
+    with st.expander("最佳参数列表", expanded=False):
+        params = optuna_good_params(study, keep=kwargs.pop("keep", 0.2))
+        st.dataframe(params, use_container_width=True)
+    return study
+
+
+def show_drawdowns(df, ret_col, **kwargs):
+    """展示最大回撤分析
+
+    :param df: pd.DataFrame, columns: cells, index: dates
+    :param ret_col: str, 回报率列名称
+    :param kwargs:
+
+        - sub_title: str, optional, 子标题
+        - top: int, optional, 默认10, 返回最大回撤的数量
+
+    """
+    assert isinstance(df, pd.DataFrame), "df 必须是 pd.DataFrame 类型"
+    if not df.index.dtype == 'datetime64[ns]':
+        df['dt'] = pd.to_datetime(df['dt'])
+        df.set_index('dt', inplace=True)
+    assert df.index.dtype == 'datetime64[ns]', "index必须是datetime64[ns]类型, 请先使用 pd.to_datetime 进行转换"
+    df = df[[ret_col]].copy().fillna(0)
+    df.sort_index(inplace=True, ascending=True)
+    df['cum_ret'] = df[ret_col].cumsum()
+    df['cum_max'] = df['cum_ret'].cummax()
+    df['drawdown'] = df['cum_ret'] - df['cum_max']
+
+    sub_title = kwargs.get('sub_title', "最大回撤分析")
+    if sub_title:
+        st.subheader(sub_title, divider="rainbow")
+
+    top = kwargs.get('top', 10)
+    if top is not None:
+        with st.expander(f"TOP{top} 最大回撤详情", expanded=False):
+            dft = czsc.top_drawdowns(df[ret_col].copy(), top=10)
+            dft = dft.style.background_gradient(cmap='RdYlGn_r', subset=['净值回撤'])
+            dft = dft.background_gradient(cmap='RdYlGn', subset=['回撤天数', '恢复天数'])
+            dft = dft.format({'净值回撤': '{:.2%}', '回撤天数': '{:.0f}', '恢复天数': '{:.0f}'})
+            st.dataframe(dft, use_container_width=True)
+
+    drawdown = go.Scatter(x=df.index, y=df["drawdown"], fillcolor="red", fill='tozeroy', mode="lines", name="回测曲线")
+    fig = go.Figure(drawdown)
+    fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
+    fig.update_layout(title="", xaxis_title="", yaxis_title="净值回撤", legend_title="回撤曲线")
+    st.plotly_chart(fig, use_container_width=True)
diff --git a/czsc/utils/stats.py b/czsc/utils/stats.py
index fad156a64..e683af713 100644
--- a/czsc/utils/stats.py
+++ b/czsc/utils/stats.py
@@ -363,3 +363,42 @@ def holds_performance(df, **kwargs):
     dfr['cost'] = dfr['change'] * fee / 10000                       # 换手成本
     dfr['edge_post_fee'] = dfr['edge_pre_fee'] - dfr['cost']        # 净收益
     return dfr
+
+
+def top_drawdowns(returns: pd.Series, top: int = 10) -> pd.DataFrame:
+    """分析最大回撤，返回最大回撤的波峰、波谷、恢复日期、回撤天数、恢复天数
+
+    :param returns: pd.Series, 日收益率序列，index为日期
+    :param top: int, optional, 返回最大回撤的数量，默认10
+    :return: pd.DataFrame
+    """
+    returns = returns.copy()
+    df_cum = returns.cumsum()
+    underwater = df_cum - df_cum.cummax()
+
+    drawdowns = []
+    for _ in range(top):
+        valley = underwater.idxmin()  # end of the period
+        peak = underwater[:valley][underwater[:valley] == 0].index[-1]
+        try:
+            recovery = underwater[valley:][underwater[valley:] == 0].index[0]
+        except IndexError:
+            recovery = np.nan  # drawdown not recovered
+
+        # Slice out draw-down period
+        if not pd.isnull(recovery):
+            underwater.drop(underwater[peak:recovery].index[1:-1], inplace=True)
+        else:
+            # drawdown has not ended yet
+            underwater = underwater.loc[:peak]
+
+        drawdown = df_cum.loc[valley] - df_cum.loc[peak]
+
+        drawdowns.append((peak, valley, recovery, drawdown))
+        if (len(returns) == 0) or (len(underwater) == 0) or (np.min(underwater) == 0):
+            break
+
+    df_drawdowns = pd.DataFrame(drawdowns, columns=["回撤开始", "回撤结束", "回撤修复", "净值回撤"])
+    df_drawdowns['回撤天数'] = (df_drawdowns['回撤结束'] - df_drawdowns['回撤开始']).dt.days
+    df_drawdowns['恢复天数'] = (df_drawdowns['回撤修复'] - df_drawdowns['回撤结束']).dt.days
+    return df_drawdowns
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 88d3f6e1d..5717ce801 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -29,4 +29,5 @@ lightgbm>=4.0.0
 streamlit
 redis
 oss2
-statsmodels
\ No newline at end of file
+statsmodels
+optuna
\ No newline at end of file
diff --git a/examples/develop/psi.py b/examples/develop/psi.py
new file mode 100644
index 000000000..90082c7e2
--- /dev/null
+++ b/examples/develop/psi.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+"""
+author: Napoleon
+create_dt: 2024/03/13 12:14
+describe: psi模型稳定性评估
+
+
+"""
+import numpy as np
+import pandas as pd
+
+
+def psi(df: pd.DataFrame, col, n=10, **kwargs):
+    """PSI 群体稳定性指标，反映数据在不同分箱中的分布变化
+
+    PSI = ∑(实际占比 - 基准占比) * ln(实际占比 / 基准占比)
+
+    参考：https://zhuanlan.zhihu.com/p/79682292  风控模型—群体稳定性指标(PSI)深入理解应用
+
+    :param df: 数据, 必须包含 dt 和 col 列
+    :param col: 要计算的列
+    :param n: 分箱数
+    :param kwargs:
+
+        - scale: 是否进行标准化
+        - window: 滚动窗口
+        - min_periods: 最小观测数
+        - dt_pattern: 时间分组格式，默认 '%Y' 表示按年分组; '%Y-%m' 表示按月分组; 按季度分组 '%Y-%q'
+
+    :return: pd.DataFrame
+    """
+    assert 'dt' in df.columns, '时间列必须为 dt'
+    assert col in df.columns, f'数据中没有 {col} 列'
+    df['dt'] = pd.to_datetime(df['dt'])
+    dt_pattern = kwargs.get('dt_pattern', '%Y')
+    df['key'] = df['dt'].dt.strftime(dt_pattern)
+
+    if kwargs.get('scale', False):
+        window = kwargs.get('window', 2000)
+        min_periods = kwargs.get('min_periods', 100)
+
+        df[col] = df[col].rolling(window=window, min_periods=min_periods).apply(
+            lambda x: ((x - x.mean()) / x.std())[-1], raw=True).fillna(0)
+
+    df['bin'] = pd.qcut(df[col], n)
+    dfg = df.groupby(['bin', 'key'], observed=False).size().unstack().fillna(0).apply(lambda x: x / x.sum(), axis=0)
+    dfg['PSI'] = dfg.diff(axis=1).abs().mean(axis=1)
+
+    # base_col = dfg.columns[0]
+    # for rate_col in dfg.columns[1:]:
+    #     dfg[f"{col}_PSI"] = (dfg[rate_col] - dfg[base_col]) * np.log((dfg[rate_col] / dfg[base_col]))
+    # psi_cols = [x for x in dfg.columns if x.endswith('_PSI')]
+    # dfg['PSI'] = dfg[psi_cols].sum(axis=1)
+    return dfg
+
+
+if __name__ == '__main__':
+    from czsc.connectors import research
+    df = research.get_raw_bars('000001.SH', '日线', '20170101', '20230101', fq='前复权', raw_bars=False)
+
+    dfs = psi(df, 'close', 10, dt_pattern='%Y', scale=True)
diff --git a/examples/qmt_realtime.py b/examples/qmt_realtime.py
index d3df7a74b..6ba3b160d 100644
--- a/examples/qmt_realtime.py
+++ b/examples/qmt_realtime.py
@@ -49,6 +49,3 @@ def get_index_members(index_code='000852.SH', trade_date='20230131'):
 if __name__ == '__main__':
     manager = qmc.QmtTradeManager(**gjm)
     manager.run()
-
-
-
diff --git a/examples/signals_dev/fenlei.py b/examples/signals_dev/fenlei.py
new file mode 100644
index 000000000..f4fbacb13
--- /dev/null
+++ b/examples/signals_dev/fenlei.py
@@ -0,0 +1,7 @@
+import czsc
+from czsc.connectors import research
+
+bars = research.get_raw_bars("000001.SH", '15分钟', '20101101', '20210101', fq='前复权')
+
+signals_config = [{'name': "czsc.signals.tas_macd_first_bs_V221201", 'freq': "60分钟"}]
+czsc.check_signals_acc(bars, signals_config=signals_config, height='780px', delta_days=5)  # type: ignore
diff --git a/requirements.txt b/requirements.txt
index 10fbcd5c6..f9e604cf1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,4 +23,5 @@ lightgbm>=4.0.0
 streamlit
 redis
 oss2
-statsmodels
\ No newline at end of file
+statsmodels
+optuna
\ No newline at end of file
diff --git a/test/test_features.py b/test/test_features.py
index e5393e073..c7aed09b6 100644
--- a/test/test_features.py
+++ b/test/test_features.py
@@ -1,4 +1,5 @@
 import pytest
+import numpy as np
 import pandas as pd
 
 
@@ -12,3 +13,27 @@ def test_is_event_feature():
     # 测试非事件类因子
     df2 = pd.DataFrame({'factor': [0, 1, 2, 3, 4, 5]})
     assert is_event_feature(df2, 'factor') is False
+
+
+def test_rolling_tanh():
+    from czsc.features.utils import rolling_tanh
+
+    # Create a dummy dataframe
+    df = pd.DataFrame({
+        'dt': pd.date_range(start='1/1/2021', periods=500),
+        'col1': np.random.rand(500)
+    })
+
+    # Apply the rolling_tanh function
+    result_df = rolling_tanh(df, 'col1')
+    assert 'col1_tanh' in result_df.columns
+    assert result_df['col1_tanh'].between(-1, 1).all()
+
+    # Apply the rolling_tanh function
+    result_df = rolling_tanh(df, 'col1', new_col='col1_tanh2')
+    assert 'col1_tanh2' in result_df.columns
+    assert result_df['col1_tanh2'].between(-1, 1).all()
+
+    result_df = rolling_tanh(df, 'col1', new_col='col1_tanh3', window=100, min_periods=50)
+    assert 'col1_tanh3' in result_df.columns
+    assert result_df['col1_tanh3'].between(-1, 1).all()