diff --git a/examples/develop/index_composition.py b/examples/develop/index_composition.py new file mode 100644 index 000000000..19b453bb3 --- /dev/null +++ b/examples/develop/index_composition.py @@ -0,0 +1,115 @@ +import pandas as pd + + +def index_composition_by_base_point(klines, weights=None, base_point=1000): + """设置基点,按收益率加权合成指数 + + 输入: + + 1. 成分标的K线行情 + 2. 每个时刻的成分权重 + 3. 基点 + + 过程: + + 1. 计算成分标的每根K线的收益 + 2. 在每个时刻,按成分权重对标的收益加权计算,得到每个时刻的指数涨跌幅 + 3. 用基点和每个时刻的涨跌幅计算出每个时刻的指数价 + + 输出:指数K线行情 + + :param klines: K线行情,样例如下: + + ========= =================== ======= ======= ======= ======= ========== =========== + symbol dt open close high low vol amount + ========= =================== ======= ======= ======= ======= ========== =========== + 000001.SH 2021-01-04 09:31:00 3473.82 3469.37 3474.33 3468.86 1041014208 13018854400 + 000001.SH 2021-01-04 09:32:00 3470.18 3467.58 3471.95 3467.58 570367232 7721827328 + 000001.SH 2021-01-04 09:33:00 3467.11 3466.98 3468.53 3466.94 660060480 8371049984 + 000001.SH 2021-01-04 09:34:00 3466.83 3463.5 3466.83 3463.4 563931392 7435783168 + 000001.SH 2021-01-04 09:35:00 3463.23 3460.33 3463.75 3460.33 504271904 6500695552 + ========= =================== ======= ======= ======= ======= ========== =========== + + :param weights: 权重调整记录;索引为dt,columns为成分权重,每个时间截面的权重之和可以不为1,样例如下: + + =================== =========== =========== =========== =========== + dt 000001.SH 000016.SH 000300.SH 000905.SH + =================== =========== =========== =========== =========== + 2021-01-04 09:31:00 0.244275 0.236822 0.271415 0.204674 + 2021-01-04 10:10:00 0.250273 0.140398 0.151955 0.294418 + 2021-01-04 10:54:00 0.127531 0.123941 0.199969 0.19682 + =================== =========== =========== =========== =========== + + 注意:权重调整记录的时间截面必须是K线行情的时间截面的子集,且必须包含K线行情的第一根K线的时间截面 + + :param base_point: 基点,默认为1000 + :return: 指数K线行情,样例如下: + + =================== ============ ======== ========== =========== + dt returns price vol amount + =================== ============ ======== ========== =========== + 2021-01-04 09:31:00 0 1000 2195268112 31725149952 + 2021-01-04 09:32:00 -0.000230561 999.769 1187524832 18900989440 + 2021-01-04 09:33:00 -0.000165153 999.604 1330775568 19418287360 + 2021-01-04 09:34:00 -0.00103582 998.569 1133046300 17488768512 + 2021-01-04 09:35:00 -0.00067244 997.897 1025222108 15351070080 + =================== ============ ======== ========== =========== + """ + klines["dt"] = pd.to_datetime(klines["dt"]) + + data = [] + for _, kline in klines.groupby("symbol"): + kline = kline.sort_values("dt", ascending=True).reset_index(drop=True) + kline["returns"] = kline["close"].pct_change() + kline["returns"] = kline["returns"].fillna(0) + data.append(kline) + klines = pd.concat(data, ignore_index=True) + returns = klines.pivot_table(index="dt", columns="symbol", values="returns") + + if weights is None: + weights = returns.copy() + weights[:] = 1 / len(returns.columns.tolist()) + else: + weights['dt'] = pd.to_datetime(weights['dt']) + assert weights['dt'].min() <= klines['dt'].min(), "权重调整记录的首个时刻必须小于等于成分K线首个时刻" + weights.set_index("dt", inplace=True) + weights = weights.reindex(returns.index, method="ffill", copy=True) + + index_ = (returns * weights).sum(axis=1).to_frame("returns") + index_["price"] = base_point * (1 + index_["returns"]).cumprod() + index_['vol'] = klines.groupby("dt")["vol"].sum() + index_['amount'] = klines.groupby("dt")["amount"].sum() + index_ = index_.reset_index() + return index_ + + +def test_index_composition_by_base_point(): + """测试 index_composition_by_base_point""" + import random + from czsc.connectors.research import get_raw_bars + + symbols = ['000001.SH', '000016.SH', '000300.SH', '000905.SH'] + data = [] + for symbol in symbols: + bars = get_raw_bars(symbol=symbol, freq="1分钟", sdt="20210104 09:31", edt="20210107 13:10") + df = pd.DataFrame(bars) + df.drop(columns=["freq", "cache", 'id'], inplace=True) + data.append(df) + klines = pd.concat(data, ignore_index=True) + + index1 = index_composition_by_base_point(klines=klines.copy()) + assert int(index1['price'].iloc[-1]) == 1036 + + weights = pd.DataFrame({"dt": ['2021-01-03 10:10:00', '2021-01-04 10:54:00', '2021-01-05 12:10:00']}) + for symbol in symbols: + weights[symbol] = [random.uniform(0.1, 0.3) for _ in range(3)] + index2 = index_composition_by_base_point(klines=klines.copy(), weights=weights) + assert int(index2['vol'].iloc[-1]) == 200930300 + + try: + weights = pd.DataFrame({"dt": ['2021-01-04 10:10:00', '2021-01-04 10:54:00', '2021-01-05 12:10:00']}) + for symbol in symbols: + weights[symbol] = [random.uniform(0.1, 0.3) for _ in range(3)] + index2 = index_composition_by_base_point(klines=klines.copy(), weights=weights) + except AssertionError: + pass