diff --git a/README.md b/README.md index ed9cc3334..ab80214a8 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ :warning: **Use at own risk** :warning: -v5.9.8 +v5.9.10 ## Overview diff --git a/backtest.py b/backtest.py index 9e0a701a0..f91978873 100644 --- a/backtest.py +++ b/backtest.py @@ -238,7 +238,7 @@ async def main(): print(f"{k: <{max(map(len, keys)) + 2}} {config[k]}") print() if config["ohlcv"]: - data = load_hlc_cache( + data = await load_hlc_cache( symbol, config["inverse"], config["start_date"], diff --git a/configs/optimize/default.hjson b/configs/optimize/default.hjson index aac1b0347..4b1cfeef0 100644 --- a/configs/optimize/default.hjson +++ b/configs/optimize/default.hjson @@ -48,6 +48,10 @@ maximum_eqbal_ratio_std_long: 0.025 maximum_eqbal_ratio_std_short: 0.025 + # score -= max(exposure_ratios_mean, eqbal_ratio_std) + maximum_exposure_ratios_mean_long: 0.1 + maximum_exposure_ratios_mean_short: 0.1 + # clip results: compute score on top performers only # clip_threshold=0.1 means drop 10% worst performers; clip_threshold=0.0 means include all clip_threshold: 0.5 @@ -74,17 +78,15 @@ "DEFIUSDT", "YFIUSDT", "BALUSDT", "CRVUSDT", "TRBUSDT", "RUNEUSDT", "SUSHIUSDT", "EGLDUSDT", "SOLUSDT", "ICXUSDT", "STORJUSDT", "BLZUSDT", "UNIUSDT", "AVAXUSDT", "FTMUSDT", - "HNTUSDT", "ENJUSDT", "FLMUSDT", "TOMOUSDT", "RENUSDT", - "KSMUSDT", "NEARUSDT", "AAVEUSDT", "FILUSDT", "RSRUSDT", - "LRCUSDT", "MATICUSDT", "OCEANUSDT", "BELUSDT", "CTKUSDT", - "AXSUSDT", "ALPHAUSDT", "ZENUSDT", "SKLUSDT", "GRTUSDT", - "1INCHUSDT", "CHZUSDT", "SANDUSDT", "ANKRUSDT", "LITUSDT", - "UNFIUSDT", "REEFUSDT", "RVNUSDT", "SFPUSDT", "XEMUSDT", - "COTIUSDT", "CHRUSDT", "MANAUSDT", "ALICEUSDT", "HBARUSDT", - "ONEUSDT", "LINAUSDT", "STMXUSDT", "DENTUSDT", "CELRUSDT", - "HOTUSDT", "MTLUSDT", "OGNUSDT", "NKNUSDT", "DGBUSDT", - - + "ENJUSDT", "FLMUSDT", "TOMOUSDT", "RENUSDT","KSMUSDT", + "NEARUSDT", "AAVEUSDT", "FILUSDT", "RSRUSDT","LRCUSDT", + "MATICUSDT", "OCEANUSDT", "BELUSDT", "CTKUSDT","AXSUSDT", + "ALPHAUSDT", "ZENUSDT", "SKLUSDT", "GRTUSDT","1INCHUSDT", + "CHZUSDT", "SANDUSDT", "ANKRUSDT", "LITUSDT","UNFIUSDT", + "REEFUSDT", "RVNUSDT", "SFPUSDT", "XEMUSDT","COTIUSDT", + "CHRUSDT", "MANAUSDT", "ALICEUSDT", "HBARUSDT","ONEUSDT", + "LINAUSDT", "STMXUSDT", "DENTUSDT", "CELRUSDT","HOTUSDT", + "MTLUSDT", "OGNUSDT", "NKNUSDT", "DGBUSDT", ] bounds_static_grid: diff --git a/downloader.py b/downloader.py index ad1c8e729..a38c1e5eb 100644 --- a/downloader.py +++ b/downloader.py @@ -12,6 +12,7 @@ from urllib.request import urlopen from zipfile import ZipFile import traceback +import aiohttp import numpy as np import pandas as pd @@ -29,7 +30,7 @@ add_argparse_args, utc_ms, ) -from pure_funcs import ts_to_date, ts_to_date_utc, date_to_ts, get_dummy_settings +from pure_funcs import ts_to_date, ts_to_date_utc, date_to_ts2, get_dummy_settings, get_day class Downloader: @@ -981,11 +982,97 @@ def get_first_ohlcv_ts(symbol: str, spot=False) -> int: return 0 -def get_csv_gz(url: str): +def findall(string, pattern): + """Yields all the positions of + the pattern in the string""" + i = string.find(pattern) + while i != -1: + yield i + i = string.find(pattern, i + 1) + + +def get_days_in_between(start_day, end_day): + date_format = "%Y-%m-%d" + start_date = datetime.datetime.strptime(start_day, date_format) + end_date = datetime.datetime.strptime(end_day, date_format) + + days_in_between = [] + current_date = start_date + while current_date <= end_date: + days_in_between.append(current_date.strftime(date_format)) + current_date += datetime.timedelta(days=1) + + return days_in_between + + +async def download_ohlcvs_bybit(symbol, start_date, end_date, download_only=False): + start_date, end_date = get_day(start_date), get_day(end_date) + assert date_to_ts2(end_date) >= date_to_ts2(start_date), "end_date is older than start_date" + dirpath = make_get_filepath(f"historical_data/ohlcvs_bybit/{symbol}/") + ideal_days = get_days_in_between(start_date, end_date) + days_done = [filename[:-4] for filename in os.listdir(dirpath) if ".csv" in filename] + days_to_get = [day for day in ideal_days if day not in days_done] + dfs = {} + if len(days_to_get) > 0: + base_url = "https://public.bybit.com/trading/" + webpage = await get_bybit_webpage(base_url, symbol) + filenames = [cand for day in days_to_get if (cand := f"{symbol}{day}.csv.gz") in webpage] + if len(filenames) > 0: + n_concurrent_fetches = 10 + for i in range(0, len(filenames), 10): + filenames_sublist = filenames[i : i + n_concurrent_fetches] + print( + f"fetching trades from {filenames_sublist[0][-17:-7]} to {filenames_sublist[-1][-17:-7]}" + ) + dfs_ = await get_bybit_trades(base_url, symbol, filenames_sublist) + dfs_ = {k[-17:-7]: convert_to_ohlcv(v) for k, v in dfs_.items()} + dumped = [] + for day, df in sorted(dfs_.items()): + if day in days_done: + continue + filepath = f"{dirpath}{day}.csv" + df.to_csv(filepath) + dumped.append(day) + if not download_only: + dfs.update(dfs_) + if not download_only: + for day in ideal_days: + if day not in days_to_get: + dfs[day] = pd.read_csv(f"{dirpath}{day}.csv") + if len(dfs) == 0: + return pd.DataFrame(columns=["timestamp", "open", "high", "low", "close", "volume"]) + df = pd.concat(dfs.values()).sort_values("timestamp").reset_index() + return df[["timestamp", "open", "high", "low", "close", "volume"]] + + +async def get_bybit_webpage(base_url: str, symbol: str): + return urlopen(f"{base_url}{symbol}/").read().decode() + + +async def get_bybit_trades(base_url: str, symbol: str, filenames: [str]): + if len(filenames) == 0: + return None + async with aiohttp.ClientSession() as session: + tasks = {} + for url in [f"{base_url}{symbol}/{filename}" for filename in filenames]: + tasks[url] = asyncio.ensure_future(get_csv_gz(session, url)) + responses = {} + for url in tasks: + responses[url] = await tasks[url] + return {k: v.sort_values("timestamp") for k, v in responses.items()} + + +async def fetch_url(session, url): + async with session.get(url) as response: + content = await response.read() + return content + + +async def get_csv_gz(session, url: str): # from bybit try: - resp = urlopen(url) - with gzip.open(BytesIO(resp.read())) as f: + resp = await fetch_url(session, url) + with gzip.open(BytesIO(resp)) as f: tdf = pd.read_csv(f) return tdf except Exception as e: @@ -1023,8 +1110,8 @@ def download_ohlcvs( base_url = "https://data.binance.vision/data/" base_url += "spot/" if spot else f"futures/{'cm' if inverse else 'um'}/" col_names = ["timestamp", "open", "high", "low", "close", "volume"] - start_ts = max(get_first_ohlcv_ts(symbol, spot=spot), date_to_ts(start_date)) - end_ts = date_to_ts(end_date) + start_ts = max(get_first_ohlcv_ts(symbol, spot=spot), date_to_ts2(start_date)) + end_ts = date_to_ts2(end_date) days = [ts_to_date_utc(x)[:10] for x in list(range(start_ts, end_ts, 1000 * 60 * 60 * 24))] months = sorted({x[:7] for x in days}) month_now = ts_to_date(time())[:7] @@ -1100,12 +1187,12 @@ def count_longest_identical_data(hlc, symbol): return longest_consecutive -def load_hlc_cache( +async def load_hlc_cache( symbol, inverse, start_date, end_date, base_dir="backtests", spot=False, exchange="binance" ): cache_fname = ( - f"{ts_to_date_utc(date_to_ts(start_date))[:10]}_" - + f"{ts_to_date_utc(date_to_ts(end_date))[:10]}_ohlcv_cache.npy" + f"{ts_to_date_utc(date_to_ts2(start_date))[:10]}_" + + f"{ts_to_date_utc(date_to_ts2(end_date))[:10]}_ohlcv_cache.npy" ) filepath = make_get_filepath( @@ -1114,9 +1201,12 @@ def load_hlc_cache( if os.path.exists(filepath): data = np.load(filepath) else: - df = download_ohlcvs(symbol, inverse, start_date, end_date, spot) - df = df[df.timestamp >= date_to_ts(start_date)] - df = df[df.timestamp <= date_to_ts(end_date)] + if exchange == "bybit": + df = await download_ohlcvs_bybit(symbol, start_date, end_date, download_only=False) + else: + df = download_ohlcvs(symbol, inverse, start_date, end_date, spot) + df = df[df.timestamp >= date_to_ts2(start_date)] + df = df[df.timestamp <= date_to_ts2(end_date)] data = df[["timestamp", "high", "low", "close"]].values np.save(filepath, data) try: @@ -1141,12 +1231,13 @@ async def main(): args = parser.parse_args() config = await prepare_backtest_config(args) if config["ohlcv"]: - data = load_hlc_cache( + data = await load_hlc_cache( config["symbol"], config["inverse"], config["start_date"], config["end_date"], spot=config["spot"], + exchange=config["exchange"], ) else: downloader = Downloader(config) diff --git a/harmony_search.py b/harmony_search.py index f4db8f129..e5cb94639 100644 --- a/harmony_search.py +++ b/harmony_search.py @@ -2,7 +2,6 @@ os.environ["NOJIT"] = "false" -from downloader import Downloader, load_hlc_cache import argparse import asyncio import json diff --git a/inspect_opt_results.py b/inspect_opt_results.py index 1b1139ae7..0b199dbfd 100755 --- a/inspect_opt_results.py +++ b/inspect_opt_results.py @@ -51,6 +51,8 @@ def main(): ("ers", "minimum_eqbal_ratio_mean_of_10_worst_short"), ("esl", "maximum_eqbal_ratio_std_long"), ("ess", "maximum_eqbal_ratio_std_short"), + ("exl", "maximum_exposure_ratios_mean_long"), + ("exs", "maximum_exposure_ratios_mean_short"), ("ct", "clip_threshold"), ] for k0, k1 in weights_keys: diff --git a/optimize.py b/optimize.py index 54790e6cd..a582e92ed 100644 --- a/optimize.py +++ b/optimize.py @@ -267,7 +267,7 @@ async def run_opt(args, config): args.symbol = symbol tmp_cfg = await prepare_backtest_config(args) if config["ohlcv"]: - data = load_hlc_cache( + data = await load_hlc_cache( symbol, config["inverse"], config["start_date"], diff --git a/particle_swarm_optimization.py b/particle_swarm_optimization.py index 2458ead9d..4a24638cc 100644 --- a/particle_swarm_optimization.py +++ b/particle_swarm_optimization.py @@ -2,7 +2,6 @@ os.environ["NOJIT"] = "false" -from downloader import Downloader, load_hlc_cache import argparse import asyncio import json diff --git a/passivbot.py b/passivbot.py index efe37c501..c3a5186cf 100644 --- a/passivbot.py +++ b/passivbot.py @@ -1466,8 +1466,8 @@ def calc_minutes_until_next_orders(self): millis_delay_next_entry_short = calc_delay_between_fills_ms_ask( self.position["short"]["price"], self.price, - self.xk["delay_between_fills_ms_entry"][0], - self.xk["delay_weight_entry"][0], + self.xk["delay_between_fills_ms_entry"][1], + self.xk["delay_weight_entry"][1], ) millis_since_prev_close_short = ( self.server_time - self.last_fills_timestamps["clock_entry_short"] @@ -1478,8 +1478,8 @@ def calc_minutes_until_next_orders(self): millis_delay_next_close_short = calc_delay_between_fills_ms_bid( self.position["short"]["price"], self.price, - self.xk["delay_between_fills_ms_close"][0], - self.xk["delay_weight_close"][0], + self.xk["delay_between_fills_ms_close"][1], + self.xk["delay_weight_close"][1], ) millis_since_prev_close_short = ( self.server_time - self.last_fills_timestamps["clock_close_short"] diff --git a/procedures.py b/procedures.py index 1ab4d18f8..04065a461 100644 --- a/procedures.py +++ b/procedures.py @@ -25,7 +25,7 @@ ts_to_date_utc, get_dummy_settings, config_pretty_str, - date_to_ts, + date_to_ts2, get_template_live_config, sort_dict_keys, make_compatible, @@ -87,8 +87,8 @@ async def prepare_backtest_config(args) -> dict: config["spot"] = False else: config["spot"] = args.market_type == "spot" - config["start_date"] = ts_to_date_utc(date_to_ts(config["start_date"]))[:10] - config["end_date"] = ts_to_date_utc(date_to_ts(config["end_date"]))[:10] + config["start_date"] = ts_to_date_utc(date_to_ts2(config["start_date"]))[:10] + config["end_date"] = ts_to_date_utc(date_to_ts2(config["end_date"]))[:10] config["exchange"] = load_exchange_key_secret_passphrase(config["user"])[0] config["session_name"] = ( f"{config['start_date'].replace(' ', '').replace(':', '').replace('.', '')}_" @@ -458,8 +458,8 @@ def make_tick_samples(config: dict, sec_span: int = 1): """ for key in ["exchange", "symbol", "spot", "start_date", "end_date"]: assert key in config - start_ts = date_to_ts(config["start_date"]) - end_ts = date_to_ts(config["end_date"]) + start_ts = date_to_ts2(config["start_date"]) + end_ts = date_to_ts2(config["end_date"]) ticks_filepath = os.path.join( "historical_data", config["exchange"], diff --git a/pure_funcs.py b/pure_funcs.py index 1c9cd8e2e..7c19a1274 100644 --- a/pure_funcs.py +++ b/pure_funcs.py @@ -277,6 +277,46 @@ def date_to_ts(d): return int(parser.parse(d).replace(tzinfo=datetime.timezone.utc).timestamp() * 1000) +def date_to_ts2(datetime_string): + try: + date_formats = [ + "%Y", + "%Y-%m", + "%Y-%m-%d", + "%Y-%m-%dT%H", + "%Y-%m-%dT%H:%M", + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%SZ", + ] + for format in date_formats: + try: + date_obj = datetime.datetime.strptime(datetime_string, format) + if format == "%Y" or format == "%Y-%m" or format == "%Y-%m-%d": + date_obj = date_obj.replace(hour=0, minute=0, second=0, microsecond=0) + timestamp = date_obj.replace(tzinfo=datetime.timezone.utc).timestamp() + timestamp_ms = int(timestamp * 1000) + return timestamp_ms + except ValueError: + pass + raise ValueError("Invalid datetime format") + except Exception as e: + print("Error:", e) + return None + + +def get_day(date): + # date can be str datetime or float/int timestamp + try: + return ts_to_date_utc(date_to_ts2(date))[:10] + except: + pass + try: + return ts_to_date_utc(date)[:10] + except: + pass + raise Exception(f"failed to get day from {date}") + + def get_utc_now_timestamp() -> int: """ Creates a millisecond based timestamp of UTC now. @@ -1399,6 +1439,7 @@ def calc_scores(config: dict, results: dict): # [(key_name, higher_is_better)] keys = [ ("adg_weighted_per_exposure", True), + ("exposure_ratios_mean", False), ("hrs_stuck_max", False), ("pa_distance_mean", False), ("pa_distance_std", False), @@ -1463,73 +1504,6 @@ def calc_scores(config: dict, results: dict): } -def calc_scores_old(config: dict, results: dict): - sides = ["long", "short"] - keys = [ - ("adg_realized_per_exposure", True), - ("pa_distance_std", False), - ("pa_distance_mean", False), - ("hrs_stuck_max", False), - ("loss_profit_ratio", False), - ("eqbal_ratio_min", True), - ] - means = {side: {} for side in sides} # adjusted means - scores = {side: -1.0 for side in sides} - raws = {side: {} for side in sides} # unadjusted means - individual_raws = {side: {sym: {} for sym in results} for side in sides} - individual_vals = {side: {sym: {} for sym in results} for side in sides} - individual_scores = {side: {sym: -1.0 for sym in results} for side in sides} - symbols_to_include = {side: [] for side in sides} - for side in sides: - for sym in results: - for key, mult in keys: - key_side = f"{key}_{side}" - if key_side not in results[sym]: - results[sym][key_side] = results[sym][key] - individual_raws[side][sym][key] = results[sym][key_side] - if (max_key := f"maximum_{key}_{side}") in config: - if config[max_key] >= 0.0: - val = max(config[max_key], results[sym][key_side]) - else: - val = 1.0 - elif (min_key := f"minimum_{key}_{side}") in config: - if config[min_key] >= 0.0: - val = min(config[min_key], results[sym][key_side]) - else: - val = 1.0 - else: - val = results[sym][key_side] - individual_vals[side][sym][key] = val - if mult: - individual_scores[side][sym] *= val - else: - individual_scores[side][sym] /= val - raws[side] = { - key: np.mean([individual_raws[side][sym][key] for sym in results]) for key, _ in keys - } - symbols_to_include[side] = sorted( - individual_scores[side], key=lambda x: individual_scores[side][x] - )[: max(1, int(len(individual_scores[side]) * (1 - config["clip_threshold"])))] - # print(symbols_to_include, individual_scores[side], config["clip_threshold"]) - means[side] = { - key: np.mean([individual_vals[side][sym][key] for sym in symbols_to_include[side]]) - for key, _ in keys - } - for key, mult in keys: - if mult: - scores[side] *= means[side][key] - else: - scores[side] /= means[side][key] - return { - "scores": scores, - "means": means, - "raws": raws, - "individual_scores": individual_scores, - "keys": keys, - "symbols_to_include": symbols_to_include, - } - - def configs_are_equal(cfg0, cfg1) -> bool: try: cfg0 = candidate_to_live_config(cfg0)