diff --git a/examples/FinRL_PortfolioOptimizationEnv_Demo.ipynb b/examples/FinRL_PortfolioOptimizationEnv_Demo.ipynb new file mode 100644 index 000000000..23bf077c6 --- /dev/null +++ b/examples/FinRL_PortfolioOptimizationEnv_Demo.ipynb @@ -0,0 +1,2465 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "3xt6fIDownZs" + }, + "source": [ + "# A guide Portfolio Optimization Environment\n", + "\n", + "This notebook aims to provide an example of using PortfolioOptimizationEnv (or POE) to train a reinforcement learning model that learns to solve the portfolio optimization problem.\n", + "\n", + "In this document, we will reproduce a famous architecture called EIIE (ensemble of identical independent evaluators), introduced in the following paper:\n", + "\n", + "- Zhengyao Jiang, Dixing Xu, & Jinjun Liang. (2017). A Deep Reinforcement Learning Framework for the Financial Portfolio Management Problem. https://doi.org/10.48550/arXiv.1706.10059.\n", + "\n", + "It's advisable to read it to understand the algorithm implemented in this notebook.\n", + "\n", + "### Note\n", + "If you're using this environment, consider citing the following paper (in adittion to FinRL references):\n", + "\n", + "- Caio Costa, & Anna Costa (2023). POE: A General Portfolio Optimization Environment for FinRL. In *Anais do II Brazilian Workshop on Artificial Intelligence in Finance* (pp. 132–143). SBC. https://doi.org/10.5753/bwaif.2023.231144.\n", + "\n", + "```\n", + "@inproceedings{bwaif,\n", + " author = {Caio Costa and Anna Costa},\n", + " title = {POE: A General Portfolio Optimization Environment for FinRL},\n", + " booktitle = {Anais do II Brazilian Workshop on Artificial Intelligence in Finance},\n", + " location = {João Pessoa/PB},\n", + " year = {2023},\n", + " keywords = {},\n", + " issn = {0000-0000},\n", + " pages = {132--143},\n", + " publisher = {SBC},\n", + " address = {Porto Alegre, RS, Brasil},\n", + " doi = {10.5753/bwaif.2023.231144},\n", + " url = {https://sol.sbc.org.br/index.php/bwaif/article/view/24959}\n", + "}\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q0L7FZeWMUHp" + }, + "source": [ + "## Installation and imports\n", + "\n", + "To run this notebook in google colab, uncomment the cells below." + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XGHfTt1HMVQw", + "outputId": "e5226807-a740-4f22-a279-f466886518ba" + }, + "outputs": [], + "source": [ + "## install finrl library\n", + "# !sudo apt install swig\n", + "# !pip install git+https://github.com/AI4Finance-Foundation/FinRL.git" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-GLganWiMYZ1", + "outputId": "b3a7f99c-55dd-4274-c1ce-ab3a8111929a" + }, + "outputs": [], + "source": [ + "## We also need to install quantstats, because the environment uses it to plot graphs\n", + "# !pip install quantstats" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": { + "id": "6RqrzokqoanP" + }, + "outputs": [], + "source": [ + "## Hide matplotlib warnings\n", + "# import warnings\n", + "# warnings.filterwarnings('ignore')\n", + "\n", + "import logging\n", + "logging.getLogger('matplotlib.font_manager').disabled = True" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cz8DLleGz_TF" + }, + "source": [ + "#### Import the necessary code libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cP5t6U7-nYoc", + "outputId": "fd138d3e-222a-4ec5-e008-03a28b89dae9" + }, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "import numpy as np\n", + "\n", + "from finrl.meta.preprocessor.yahoodownloader import YahooDownloader\n", + "from finrl.meta.env_portfolio_optimization.env_portfolio_optimization import PortfolioOptimizationEnv\n", + "from finrl.agents.portfolio_optimization.models import DRLAgent\n", + "from finrl.agents.portfolio_optimization.architectures import EIIE\n", + "\n", + "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TY2yhvpASEyo" + }, + "source": [ + "## Fetch data\n", + "\n", + "In his paper, *Jiang et al* creates a portfolio composed by the top-11 cryptocurrencies based on 30-days volume. Since it's not specified when this classification was done, it's difficult to reproduce, so we will use a similar approach in the Brazillian stock market:\n", + "\n", + "- We select top-10 stocks from Brazillian stock market;\n", + "- For simplicity, we disconsider stocks that have missing data for a days in period 2011-01-01 to 2019-12-31 (9 years);" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "H11UjCstSFwm", + "outputId": "3d27b983-d1e0-41af-b20a-421be40e469f" + }, + "outputs": [], + "source": [ + "TOP_BRL = [\n", + " \"VALE3.SA\", \"PETR4.SA\", \"ITUB4.SA\", \"BBDC4.SA\",\n", + " \"BBAS3.SA\", \"RENT3.SA\", \"LREN3.SA\", \"PRIO3.SA\",\n", + " \"WEGE3.SA\", \"ABEV3.SA\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 623 + }, + "id": "Bkm96aNsSIji", + "outputId": "e3a20095-841e-4c89-c08e-24b9575cfb02" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "[*********************100%%**********************] 1 of 1 completed\n", + "Shape of DataFrame: (22330, 8)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeticday
02011-01-038.6323118.7282038.6303135.265023576145ABEV3.SA0
12011-01-0331.50000031.79999931.37999913.5659233313400BBAS3.SA0
22011-01-0311.80976311.92736211.7242376.70865010862336BBDC4.SA0
32011-01-0318.03155518.25011817.96325310.44630310014663ITUB4.SA0
42011-01-039.2649649.4928989.2649647.0489403320493LREN3.SA0
...........................
223252019-12-3030.54999930.70999930.15000011.10735822111600PETR4.SA0
223262019-12-306.7800006.8320006.5700006.6013978933500PRIO3.SA0
223272019-12-3047.95999948.29000147.29999944.4697462701600RENT3.SA0
223282019-12-3053.65000253.86000153.20000137.32098011928100VALE3.SA0
223292019-12-3017.70000117.74000017.33000016.4313145838200WEGE3.SA0
\n", + "

22330 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " date open high ... volume tic day\n", + "0 2011-01-03 8.632311 8.728203 ... 576145 ABEV3.SA 0\n", + "1 2011-01-03 31.500000 31.799999 ... 3313400 BBAS3.SA 0\n", + "2 2011-01-03 11.809763 11.927362 ... 10862336 BBDC4.SA 0\n", + "3 2011-01-03 18.031555 18.250118 ... 10014663 ITUB4.SA 0\n", + "4 2011-01-03 9.264964 9.492898 ... 3320493 LREN3.SA 0\n", + "... ... ... ... ... ... ... ...\n", + "22325 2019-12-30 30.549999 30.709999 ... 22111600 PETR4.SA 0\n", + "22326 2019-12-30 6.780000 6.832000 ... 8933500 PRIO3.SA 0\n", + "22327 2019-12-30 47.959999 48.290001 ... 2701600 RENT3.SA 0\n", + "22328 2019-12-30 53.650002 53.860001 ... 11928100 VALE3.SA 0\n", + "22329 2019-12-30 17.700001 17.740000 ... 5838200 WEGE3.SA 0\n", + "\n", + "[22330 rows x 8 columns]" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(len(TOP_BRL))\n", + "\n", + "portfolio_raw_df = YahooDownloader(start_date = '2011-01-01',\n", + " end_date = '2019-12-31',\n", + " ticker_list = TOP_BRL).fetch_data()\n", + "portfolio_raw_df" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 444 + }, + "id": "2UqpIXsuSKfO", + "outputId": "436605d5-bc9e-4038-e3d7-7bdf140033d8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeday
tic
ABEV3.SA2233223322332233223322332233
BBAS3.SA2233223322332233223322332233
BBDC4.SA2233223322332233223322332233
ITUB4.SA2233223322332233223322332233
LREN3.SA2233223322332233223322332233
PETR4.SA2233223322332233223322332233
PRIO3.SA2233223322332233223322332233
RENT3.SA2233223322332233223322332233
VALE3.SA2233223322332233223322332233
WEGE3.SA2233223322332233223322332233
\n", + "
" + ], + "text/plain": [ + " date open high low close volume day\n", + "tic \n", + "ABEV3.SA 2233 2233 2233 2233 2233 2233 2233\n", + "BBAS3.SA 2233 2233 2233 2233 2233 2233 2233\n", + "BBDC4.SA 2233 2233 2233 2233 2233 2233 2233\n", + "ITUB4.SA 2233 2233 2233 2233 2233 2233 2233\n", + "LREN3.SA 2233 2233 2233 2233 2233 2233 2233\n", + "PETR4.SA 2233 2233 2233 2233 2233 2233 2233\n", + "PRIO3.SA 2233 2233 2233 2233 2233 2233 2233\n", + "RENT3.SA 2233 2233 2233 2233 2233 2233 2233\n", + "VALE3.SA 2233 2233 2233 2233 2233 2233 2233\n", + "WEGE3.SA 2233 2233 2233 2233 2233 2233 2233" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "portfolio_raw_df.groupby(\"tic\").count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pM829994GWo3" + }, + "source": [ + "### Instantiate Environment\n", + "\n", + "Using the `PortfolioOptimizationEnv`, it's easy to instantiate a portfolio optimization environment for reinforcement learning agents. In the example below, we use the dataframe created before to start an environment." + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalizing ['close', 'high', 'low'] by previous time...\n" + ] + } + ], + "source": [ + "df_portfolio = portfolio_raw_df[[\"date\", \"tic\", \"close\", \"high\", \"low\"]]\n", + "\n", + "environment = PortfolioOptimizationEnv(\n", + " df_portfolio,\n", + " initial_amount=100000,\n", + " comission_fee_pct=0.0025,\n", + " time_window=50,\n", + " features=[\"close\", \"high\", \"low\"]\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Instantiate Model\n", + "\n", + "Now, we can instantiate the model using FinRL API. In this example, we are going to use the EIIE architecture introduced by Jiang et. al.\n", + "\n", + ":exclamation: **Note:** Remember to set the architecture's `time_window` parameter with the same value of the environment's `time_window`." + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "750b2ea28d2a439db3fc5034927dbce2", + "c172e120fc5e4f9ab13bf8599d868b5f", + "4b2aa7128c5d4d15bb794eb76faccd6a", + "317393fb13c0449abfff29a4949553a0", + "8cb75a82e5374c51b1f47a6e15783177", + "9cb3d937be5d4f7cac192b392218ef37", + "b27b9cc333ac44a5bb2cec60d02f16c0", + "6a1187acb99d44c68e27cd5aad879ff1", + "6a5c9dbaddc441d390d4827c170cbe9c", + "1f84695a1caf4c80b29eb5eea90bb29a", + "a7a6884bfdb642b9b342f7cda49d7d67" + ] + }, + "id": "wr82W3E0uQSo", + "outputId": "61dcf1f5-7cf0-40b2-85bd-3f7dd943ddc6", + "scrolled": true + }, + "outputs": [], + "source": [ + "# set PolicyGradient parameters\n", + "model_kwargs = {\n", + " \"lr\": 0.01,\n", + " \"policy\": EIIE,\n", + "}\n", + "\n", + "# here, we can set EIIE's parameters\n", + "policy_kwargs = {\n", + " \"k_size\": 4,\n", + " \"time_window\": 50,\n", + " \"device\": device\n", + "}\n", + "\n", + "model = DRLAgent(environment).get_model(\"pg\", model_kwargs, policy_kwargs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train Model" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/20 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeday
tic
ABEV3.SA248248248248248248248
BBAS3.SA248248248248248248248
BBDC4.SA248248248248248248248
ITUB4.SA248248248248248248248
LREN3.SA248248248248248248248
PETR4.SA248248248248248248248
PRIO3.SA248248248248248248248
RENT3.SA248248248248248248248
VALE3.SA248248248248248248248
WEGE3.SA248248248248248248248
\n", + "" + ], + "text/plain": [ + " date open high low close volume day\n", + "tic \n", + "ABEV3.SA 248 248 248 248 248 248 248\n", + "BBAS3.SA 248 248 248 248 248 248 248\n", + "BBDC4.SA 248 248 248 248 248 248 248\n", + "ITUB4.SA 248 248 248 248 248 248 248\n", + "LREN3.SA 248 248 248 248 248 248 248\n", + "PETR4.SA 248 248 248 248 248 248 248\n", + "PRIO3.SA 248 248 248 248 248 248 248\n", + "RENT3.SA 248 248 248 248 248 248 248\n", + "VALE3.SA 248 248 248 248 248 248 248\n", + "WEGE3.SA 248 248 248 248 248 248 248" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "portfolio_2020_raw_df.groupby(\"tic\").count()" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": { + "id": "xclUdAcr8-Nv" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeday
tic
ABEV3.SA247247247247247247247
BBAS3.SA247247247247247247247
BBDC4.SA247247247247247247247
ITUB4.SA247247247247247247247
LREN3.SA247247247247247247247
PETR4.SA247247247247247247247
PRIO3.SA247247247247247247247
RENT3.SA247247247247247247247
VALE3.SA247247247247247247247
WEGE3.SA247247247247247247247
\n", + "
" + ], + "text/plain": [ + " date open high low close volume day\n", + "tic \n", + "ABEV3.SA 247 247 247 247 247 247 247\n", + "BBAS3.SA 247 247 247 247 247 247 247\n", + "BBDC4.SA 247 247 247 247 247 247 247\n", + "ITUB4.SA 247 247 247 247 247 247 247\n", + "LREN3.SA 247 247 247 247 247 247 247\n", + "PETR4.SA 247 247 247 247 247 247 247\n", + "PRIO3.SA 247 247 247 247 247 247 247\n", + "RENT3.SA 247 247 247 247 247 247 247\n", + "VALE3.SA 247 247 247 247 247 247 247\n", + "WEGE3.SA 247 247 247 247 247 247 247" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "portfolio_2021_raw_df.groupby(\"tic\").count()" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": { + "id": "Lkl9XcGU8_5i" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dateopenhighlowclosevolumeday
tic
ABEV3.SA250250250250250250250
BBAS3.SA250250250250250250250
BBDC4.SA250250250250250250250
ITUB4.SA250250250250250250250
LREN3.SA250250250250250250250
PETR4.SA250250250250250250250
PRIO3.SA250250250250250250250
RENT3.SA250250250250250250250
VALE3.SA250250250250250250250
WEGE3.SA250250250250250250250
\n", + "
" + ], + "text/plain": [ + " date open high low close volume day\n", + "tic \n", + "ABEV3.SA 250 250 250 250 250 250 250\n", + "BBAS3.SA 250 250 250 250 250 250 250\n", + "BBDC4.SA 250 250 250 250 250 250 250\n", + "ITUB4.SA 250 250 250 250 250 250 250\n", + "LREN3.SA 250 250 250 250 250 250 250\n", + "PETR4.SA 250 250 250 250 250 250 250\n", + "PRIO3.SA 250 250 250 250 250 250 250\n", + "RENT3.SA 250 250 250 250 250 250 250\n", + "VALE3.SA 250 250 250 250 250 250 250\n", + "WEGE3.SA 250 250 250 250 250 250 250" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "portfolio_2022_raw_df.groupby(\"tic\").count()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IFYB9iGwAPSh" + }, + "source": [ + "### Instantiate different environments\n", + "\n", + "Since we have three different periods of time, we need three different environments instantiated to simulate them." + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "id": "HhsL5Cxx9d5s" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalizing ['close', 'high', 'low'] by previous time...\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalizing ['close', 'high', 'low'] by previous time...\n", + "Normalizing ['close', 'high', 'low'] by previous time...\n" + ] + } + ], + "source": [ + "df_portfolio_2020 = portfolio_2020_raw_df[[\"date\", \"tic\", \"close\", \"high\", \"low\"]]\n", + "df_portfolio_2021 = portfolio_2021_raw_df[[\"date\", \"tic\", \"close\", \"high\", \"low\"]]\n", + "df_portfolio_2022 = portfolio_2022_raw_df[[\"date\", \"tic\", \"close\", \"high\", \"low\"]]\n", + "\n", + "environment_2020 = PortfolioOptimizationEnv(\n", + " df_portfolio_2020,\n", + " initial_amount=100000,\n", + " comission_fee_pct=0.0025,\n", + " time_window=50,\n", + " features=[\"close\", \"high\", \"low\"]\n", + ")\n", + "\n", + "environment_2021 = PortfolioOptimizationEnv(\n", + " df_portfolio_2021,\n", + " initial_amount=100000,\n", + " comission_fee_pct=0.0025,\n", + " time_window=50,\n", + " features=[\"close\", \"high\", \"low\"]\n", + ")\n", + "\n", + "environment_2022 = PortfolioOptimizationEnv(\n", + " df_portfolio_2022,\n", + " initial_amount=100000,\n", + " comission_fee_pct=0.0025,\n", + " time_window=50,\n", + " features=[\"close\", \"high\", \"low\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y4RuS2pRAa4H" + }, + "source": [ + "### Test EIIE architecture\n", + "Now, we can test the EIIE architecture in the three different test periods. It's important no note that, in this code, we load the saved policy even though it's not necessary just to show how to save and load your model." + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": { + "id": "JeRy__TI9CAs" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 104272.4921875\n", + "Final accumulative portfolio value: 1.042724921875\n", + "Maximum DrawDown: -0.3134186860319077\n", + "Sharpe ratio: 0.36180776300706646\n", + "=================================\n", + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 42020.9765625\n", + "Final accumulative portfolio value: 0.420209765625\n", + "Maximum DrawDown: -0.5931160156249999\n", + "Sharpe ratio: -3.141339365788307\n", + "=================================\n", + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 52142.08203125\n", + "Final accumulative portfolio value: 0.5214208203125\n", + "Maximum DrawDown: -0.5175579482110072\n", + "Sharpe ratio: -2.195587992293611\n", + "=================================\n" + ] + } + ], + "source": [ + "EIIE_results = {\n", + " \"training\": environment._asset_memory[\"final\"],\n", + " \"2020\": {},\n", + " \"2021\": {},\n", + " \"2022\": {}\n", + "}\n", + "\n", + "# instantiate an architecture with the same arguments used in training\n", + "# and load with load_state_dict.\n", + "policy = EIIE(k_size= 4, time_window= 50, device=device)\n", + "policy.load_state_dict(torch.load(\"policy_EIIE.pt\"))\n", + "\n", + "# 2020\n", + "DRLAgent.DRL_validation(model, environment_2020, policy=policy)\n", + "EIIE_results[\"2020\"][\"value\"] = environment_2020._asset_memory[\"final\"]\n", + "\n", + "# 2021\n", + "DRLAgent.DRL_validation(model, environment_2021, policy=policy)\n", + "EIIE_results[\"2021\"][\"value\"] = environment_2021._asset_memory[\"final\"]\n", + "\n", + "# 2022\n", + "DRLAgent.DRL_validation(model, environment_2022, policy=policy)\n", + "EIIE_results[\"2022\"][\"value\"] = environment_2022._asset_memory[\"final\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LZc5PpbaBU-J" + }, + "source": [ + "### Test Uniform Buy and Hold\n", + "For comparison, we will also test the performance of a uniform buy and hold strategy. In this strategy, the portfolio has no remaining cash and the same percentage of money is allocated in each asset." + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": { + "id": "ntHO_UIs-83T" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 403056.28125\n", + "Final accumulative portfolio value: 4.0305628125\n", + "Maximum DrawDown: -0.47875244091762803\n", + "Sharpe ratio: 0.7853090877067095\n", + "=================================\n", + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 171126.8125\n", + "Final accumulative portfolio value: 1.711268125\n", + "Maximum DrawDown: -0.250801953125\n", + "Sharpe ratio: 1.712443490118881\n", + "=================================\n", + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 95723.921875\n", + "Final accumulative portfolio value: 0.95723921875\n", + "Maximum DrawDown: -0.17293185561981794\n", + "Sharpe ratio: -0.1558444284474649\n", + "=================================\n", + "=================================\n", + "Initial portfolio value:100000\n", + "Final portfolio value: 114157.5\n", + "Final accumulative portfolio value: 1.141575\n", + "Maximum DrawDown: -0.16239865532322129\n", + "Sharpe ratio: 0.8449068899613046\n", + "=================================\n" + ] + } + ], + "source": [ + "UBAH_results = {\n", + " \"train\": {},\n", + " \"2020\": {},\n", + " \"2021\": {},\n", + " \"2022\": {}\n", + "}\n", + "\n", + "PORTFOLIO_SIZE = len(TOP_BRL)\n", + "\n", + "# train period\n", + "terminated = False\n", + "environment.reset()\n", + "while not terminated:\n", + " action = [0] + [1/PORTFOLIO_SIZE] * PORTFOLIO_SIZE\n", + " _, _, terminated, _ = environment.step(action)\n", + "UBAH_results[\"train\"][\"value\"] = environment._asset_memory[\"final\"]\n", + "\n", + "# 2020\n", + "terminated = False\n", + "environment_2020.reset()\n", + "while not terminated:\n", + " action = [0] + [1/PORTFOLIO_SIZE] * PORTFOLIO_SIZE\n", + " _, _, terminated, _ = environment_2020.step(action)\n", + "UBAH_results[\"2020\"][\"value\"] = environment_2020._asset_memory[\"final\"]\n", + "\n", + "# 2021\n", + "terminated = False\n", + "environment_2021.reset()\n", + "while not terminated:\n", + " action = [0] + [1/PORTFOLIO_SIZE] * PORTFOLIO_SIZE\n", + " _, _, terminated, _ = environment_2021.step(action)\n", + "UBAH_results[\"2021\"][\"value\"] = environment_2021._asset_memory[\"final\"]\n", + "\n", + "# 2022\n", + "terminated = False\n", + "environment_2022.reset()\n", + "while not terminated:\n", + " action = [0] + [1/PORTFOLIO_SIZE] * PORTFOLIO_SIZE\n", + " _, _, terminated, _ = environment_2022.step(action)\n", + "UBAH_results[\"2022\"][\"value\"] = environment_2022._asset_memory[\"final\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kBMM7hAHC6rq" + }, + "source": [ + "### Plot graphics" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": { + "id": "n8YrDNpeC71w" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline \n", + "\n", + "plt.plot(UBAH_results[\"train\"][\"value\"], label=\"Buy and Hold\")\n", + "plt.plot(EIIE_results[\"training\"], label=\"EIIE\")\n", + "\n", + "plt.xlabel(\"Days\")\n", + "plt.ylabel(\"Portfolio Value\")\n", + "plt.title(\"Performance in training period\")\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": { + "id": "dQniascoDIH2" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(UBAH_results[\"2020\"][\"value\"], label=\"Buy and Hold\")\n", + "plt.plot(EIIE_results[\"2020\"][\"value\"], label=\"EIIE\")\n", + "\n", + "plt.xlabel(\"Days\")\n", + "plt.ylabel(\"Portfolio Value\")\n", + "plt.title(\"Performance in 2020\")\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": { + "id": "1hJtnW7QDIt2" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(UBAH_results[\"2021\"][\"value\"], label=\"Buy and Hold\")\n", + "plt.plot(EIIE_results[\"2021\"][\"value\"], label=\"EIIE\")\n", + "\n", + "plt.xlabel(\"Days\")\n", + "plt.ylabel(\"Portfolio Value\")\n", + "plt.title(\"Performance in 2021\")\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": { + "id": "1hJD79w-DJXo" + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(UBAH_results[\"2022\"][\"value\"], label=\"Buy and Hold\")\n", + "plt.plot(EIIE_results[\"2022\"][\"value\"], label=\"EIIE\")\n", + "\n", + "plt.xlabel(\"Days\")\n", + "plt.ylabel(\"Portfolio Value\")\n", + "plt.title(\"Performance in 2022\")\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that the agent is clearly overfitting, since it achieves great results in training period but is mediocre in testing periods. This is a common problem in this policy gradient algorithm. To deal with this, you can change the hyperparameters." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1f84695a1caf4c80b29eb5eea90bb29a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "317393fb13c0449abfff29a4949553a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1f84695a1caf4c80b29eb5eea90bb29a", + "placeholder": "​", + "style": "IPY_MODEL_a7a6884bfdb642b9b342f7cda49d7d67", + "value": " 10/250 [05:53<2:10:07, 32.53s/it]" + } + }, + "4b2aa7128c5d4d15bb794eb76faccd6a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "danger", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6a1187acb99d44c68e27cd5aad879ff1", + "max": 250, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6a5c9dbaddc441d390d4827c170cbe9c", + "value": 10 + } + }, + "6a1187acb99d44c68e27cd5aad879ff1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6a5c9dbaddc441d390d4827c170cbe9c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "750b2ea28d2a439db3fc5034927dbce2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c172e120fc5e4f9ab13bf8599d868b5f", + "IPY_MODEL_4b2aa7128c5d4d15bb794eb76faccd6a", + "IPY_MODEL_317393fb13c0449abfff29a4949553a0" + ], + "layout": "IPY_MODEL_8cb75a82e5374c51b1f47a6e15783177" + } + }, + "8cb75a82e5374c51b1f47a6e15783177": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9cb3d937be5d4f7cac192b392218ef37": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a7a6884bfdb642b9b342f7cda49d7d67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b27b9cc333ac44a5bb2cec60d02f16c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c172e120fc5e4f9ab13bf8599d868b5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9cb3d937be5d4f7cac192b392218ef37", + "placeholder": "​", + "style": "IPY_MODEL_b27b9cc333ac44a5bb2cec60d02f16c0", + "value": " 4%" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/finrl/agents/portfolio_optimization/README.md b/finrl/agents/portfolio_optimization/README.md new file mode 100644 index 000000000..720f9e187 --- /dev/null +++ b/finrl/agents/portfolio_optimization/README.md @@ -0,0 +1,87 @@ +# Portfolio Optimization Agents + +This directory contains architectures and algorithms commonly used in portfolio optimization agents. + +To instantiate the model, it's necessary to have an instance of [PortfolioOptimizationEnv](/finrl/meta/env_portfolio_optimization/). In the example below, we use the `DRLAgent` class to instantiate a policy gradient ("pg") model. With the dictionary `model_kwargs`, we can set the `PolicyGradient` class parameters and, whith the dictionary `policy_kwargs`, it's possible to change the parameters of the chosen architecture. + +```python +from finrl.agents.portfolio_optimization.models import DRLAgent +from finrl.agents.portfolio_optimization.architectures import EIIE + +# set PolicyGradient algorithm arguments +model_kwargs = { + "lr": 0.01, + "policy": EIIE, +} + +# set EIIE architecture arguments +policy_kwargs = { + "k_size": 4 +} + +model = DRLAgent(train_env).get_model("pg", model_kwargs, policy_kwargs) +``` + +In the example below, the model is trained in 5 episodes (we define an episode as a complete period of the used environment). + +```python +DRLAgent.train_model(model, episodes=5) +``` + +It's important that the architecture and the environment have the same `time_window` defined. By default, both of them use 50 timesteps as `time_window`. For more details about what is a time window, check this [article](https://doi.org/10.5753/bwaif.2023.231144). + +### Policy Gradient Algorithm + +The class `PolicyGradient` implements the Policy Gradient algorithm used in *Jiang et al* paper. This algorithm is inspired by DDPG (deep deterministic policy gradient), but there are a couple of differences: +- DDPG is an actor-critic algorithm, so it has an actor and a critic neural network. The algorithm below, however, doesn't have a critic neural network and uses the portfolio value as value function: the policy will be updated to maximize the portfolio value. +- DDPG usually makes use of a noise parameter in the action during training to create an exploratory behavior. PG algorithm, on the other hand, has a full-exploit approach. +- DDPG randomly samples experiences from its replay buffer. The implemented policy gradient, however, samples a sequential batch of experiences in time, to make it possible to calculate the variation of the portfolio value in the batch and use it as value function. + +The algorithm was implemented as follows: +1. Initializes policy network and replay buffer; +2. For each episode, do the following: + 1. For each period of `batch_size` timesteps, do the following: + 1. For each timestep, define an action to be performed, simulate the timestep and save the experiences in the replay buffer. + 2. After `batch_size` timesteps are simulated, sample the replay buffer. + 4. Calculate the value function: $V = \sum\limits_{t=1}^{batch\_size} ln(\mu_{t}(W_{t} \cdot P_{t}))$, where $W_{t}$ is the action performed at timestep t, $P_{t}$ is the price variation vector at timestep t and $\mu_{t}$ is the transaction remainder factor at timestep t. Check *Jiang et al* paper for more details. + 5. Perform gradient ascent in the policy network. + 2. If, in the and of episode, there is sequence of remaining experiences in the replay buffer, perform steps 1 to 5 with the remaining experiences. + +### References + +If you are using one of them in your research, you can use the following references. + +#### EIIE Architecture and Policy Gradient algorithm + +[A Deep Reinforcement Learning Framework for the Financial Portfolio Management Problem](https://doi.org/10.48550/arXiv.1706.10059) +``` +@misc{jiang2017deep, + title={A Deep Reinforcement Learning Framework for the Financial Portfolio Management Problem}, + author={Zhengyao Jiang and Dixing Xu and Jinjun Liang}, + year={2017}, + eprint={1706.10059}, + archivePrefix={arXiv}, + primaryClass={q-fin.CP} +} +``` + +#### EI3 Architecture + +[A Multi-Scale Temporal Feature Aggregation Convolutional Neural Network for Portfolio Management](https://doi.org/10.1145/3357384.3357961) +``` +@inproceedings{shi2018multiscale, + author = {Shi, Si and Li, Jianjun and Li, Guohui and Pan, Peng}, + title = {A Multi-Scale Temporal Feature Aggregation Convolutional Neural Network for Portfolio Management}, + year = {2019}, + isbn = {9781450369763}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3357384.3357961}, + doi = {10.1145/3357384.3357961}, + booktitle = {Proceedings of the 28th ACM International Conference on Information and Knowledge Management}, + pages = {1613–1622}, + numpages = {10}, + keywords = {portfolio management, reinforcement learning, inception network, convolution neural network}, + location = {Beijing, China}, + series = {CIKM '19} } +``` diff --git a/finrl/agents/portfolio_optimization/__init__.py b/finrl/agents/portfolio_optimization/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/finrl/agents/portfolio_optimization/algorithms.py b/finrl/agents/portfolio_optimization/algorithms.py new file mode 100644 index 000000000..13841ba80 --- /dev/null +++ b/finrl/agents/portfolio_optimization/algorithms.py @@ -0,0 +1,251 @@ +from __future__ import annotations + +import copy + +import numpy as np +import torch +from torch.optim import AdamW +from torch.utils.data import DataLoader +from tqdm import tqdm + +from .architectures import EIIE +from .utils import PVM +from .utils import ReplayBuffer +from .utils import RLDataset + + +class PolicyGradient: + """Class implementing policy gradient algorithm to train portfolio + optimization agents. + + Note: + During testing, the agent is optimized through online learning. + The parameters of the policy is updated repeatedly after a constant + period of time. To disable it, set learning rate to 0. + + Attributes: + train_env: Environment used to train the agent + train_policy: Policy used in training. + test_env: Environment used to test the agent. + test_policy: Policy after test online learning. + """ + + def __init__( + self, + env, + policy=EIIE, + policy_kwargs=None, + validation_env=None, + batch_size=100, + lr=1e-3, + optimizer=AdamW, + device="cpu", + ): + """Initializes Policy Gradient for portfolio optimization. + + Args: + env: Training Environment. + policy: Policy architecture to be used. + policy_kwargs: Arguments to be used in the policy network. + validation_env: Validation environment. + batch_size: Batch size to train neural network. + lr: policy Neural network learning rate. + optimizer: Optimizer of neural network. + device: Device where neural network is run. + """ + self.policy = policy + self.policy_kwargs = {} if policy_kwargs is None else policy_kwargs + self.validation_env = validation_env + self.batch_size = batch_size + self.lr = lr + self.optimizer = optimizer + self.device = device + self._setup_train(env, self.policy, self.batch_size, self.lr, self.optimizer) + + def _setup_train(self, env, policy, batch_size, lr, optimizer): + """Initializes algorithm before training. + + Args: + env: environment. + policy: Policy architecture to be used. + batch_size: Batch size to train neural network. + lr: Policy neural network learning rate. + optimizer: Optimizer of neural network. + """ + # environment + self.train_env = env + + # neural networks + self.train_policy = policy(**self.policy_kwargs).to(self.device) + self.train_optimizer = optimizer(self.train_policy.parameters(), lr=lr) + + # replay buffer and portfolio vector memory + self.train_batch_size = batch_size + self.train_buffer = ReplayBuffer(capacity=batch_size) + self.train_pvm = PVM(self.train_env.episode_length, env.portfolio_size) + + # dataset and dataloader + dataset = RLDataset(self.train_buffer) + self.train_dataloader = DataLoader( + dataset=dataset, batch_size=batch_size, shuffle=False, pin_memory=True + ) + + def train(self, episodes=100): + """Training sequence. + + Args: + episodes: Number of episodes to simulate. + """ + for i in tqdm(range(1, episodes + 1)): + obs = self.train_env.reset() # observation + self.train_pvm.reset() # reset portfolio vector memory + done = False + + while not done: + # define last_action and action and update portfolio vector memory + last_action = self.train_pvm.retrieve() + obs_batch = np.expand_dims(obs, axis=0) + last_action_batch = np.expand_dims(last_action, axis=0) + action = self.train_policy(obs_batch, last_action_batch) + self.train_pvm.add(action) + + # run simulation step + next_obs, reward, done, info = self.train_env.step(action) + + # add experience to replay buffer + exp = (obs, last_action, info["price_variation"], info["trf_mu"]) + self.train_buffer.append(exp) + + # update policy networks + if len(self.train_buffer) == self.train_batch_size: + self._gradient_ascent() + + obs = next_obs + + # gradient ascent with episode remaining buffer data + self._gradient_ascent() + + # validation step + if self.validation_env: + self.test(self.validation_env) + + def _setup_test(self, env, policy, batch_size, lr, optimizer): + """Initializes algorithm before testing. + + Args: + env: Environment. + policy: Policy architecture to be used. + batch_size: batch size to train neural network. + lr: policy neural network learning rate. + optimizer: Optimizer of neural network. + """ + # environment + self.test_env = env + + # process None arguments + policy = self.train_policy if policy is None else policy + lr = self.lr if lr is None else lr + optimizer = self.optimizer if optimizer is None else optimizer + + # neural networks + # define policy + self.test_policy = copy.deepcopy(policy) + self.test_optimizer = optimizer(self.test_policy.parameters(), lr=lr) + + # replay buffer and portfolio vector memory + self.test_buffer = ReplayBuffer(capacity=batch_size) + self.test_pvm = PVM(self.test_env.episode_length, env.portfolio_size) + + # dataset and dataloader + dataset = RLDataset(self.test_buffer) + self.test_dataloader = DataLoader( + dataset=dataset, batch_size=batch_size, shuffle=False, pin_memory=True + ) + + def test( + self, env, policy=None, online_training_period=10, lr=None, optimizer=None + ): + """Tests the policy with online learning. + + Args: + env: Environment to be used in testing. + policy: Policy architecture to be used. If None, it will use the training + architecture. + online_training_period: Period in which an online training will occur. To + disable online learning, use a very big value. + batch_size: Batch size to train neural network. If None, it will use the + training batch size. + lr: Policy neural network learning rate. If None, it will use the training + learning rate + optimizer: Optimizer of neural network. If None, it will use the training + optimizer + + Note: + To disable online learning, set learning rate to 0 or a very big online + training period. + """ + self._setup_test(env, policy, online_training_period, lr, optimizer) + + obs = self.test_env.reset() # observation + self.test_pvm.reset() # reset portfolio vector memory + done = False + steps = 0 + + while not done: + steps += 1 + # define last_action and action and update portfolio vector memory + last_action = self.test_pvm.retrieve() + obs_batch = np.expand_dims(obs, axis=0) + last_action_batch = np.expand_dims(last_action, axis=0) + action = self.test_policy(obs_batch, last_action_batch) + self.test_pvm.add(action) + + # run simulation step + next_obs, reward, done, info = self.test_env.step(action) + + # add experience to replay buffer + exp = (obs, last_action, info["price_variation"], info["trf_mu"]) + self.test_buffer.append(exp) + + # update policy networks + if steps % online_training_period == 0: + self._gradient_ascent(test=True) + + obs = next_obs + + def _gradient_ascent(self, test=False): + """Performs the gradient ascent step in the policy gradient algorithm. + + Args: + test: If true, it uses the test dataloader and policy. + """ + # get batch data from dataloader + obs, last_actions, price_variations, trf_mu = ( + next(iter(self.test_dataloader)) + if test + else next(iter(self.train_dataloader)) + ) + obs = obs.to(self.device) + last_actions = last_actions.to(self.device) + price_variations = price_variations.to(self.device) + trf_mu = trf_mu.unsqueeze(1).to(self.device) + + # define policy loss (negative for gradient ascent) + mu = ( + self.test_policy.mu(obs, last_actions) + if test + else self.train_policy.mu(obs, last_actions) + ) + policy_loss = -torch.mean( + torch.log(torch.sum(mu * price_variations * trf_mu, dim=1)) + ) + + # update policy network + if test: + self.test_policy.zero_grad() + policy_loss.backward() + self.test_optimizer.step() + else: + self.train_policy.zero_grad() + policy_loss.backward() + self.train_optimizer.step() diff --git a/finrl/agents/portfolio_optimization/architectures.py b/finrl/agents/portfolio_optimization/architectures.py new file mode 100644 index 000000000..571badc9c --- /dev/null +++ b/finrl/agents/portfolio_optimization/architectures.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +import numpy as np +import torch +from torch import nn + + +class EIIE(nn.Module): + def __init__( + self, + initial_features=3, + k_size=3, + conv_mid_features=2, + conv_final_features=20, + time_window=50, + device="cpu", + ): + """EIIE (ensemble of identical independent evaluators) policy network + initializer. + + Args: + initial_features: Number of input features. + k_size: Size of first convolutional kernel. + conv_mid_features: Size of intermediate convolutional channels. + conv_final_features: Size of final convolutional channels. + time_window: Size of time window used as agent's state. + device: Device in which the neural network will be run. + + Note: + Reference article: https://doi.org/10.48550/arXiv.1706.10059. + """ + super().__init__() + self.device = device + + n_size = time_window - k_size + 1 + + self.sequential = nn.Sequential( + nn.Conv2d( + in_channels=initial_features, + out_channels=conv_mid_features, + kernel_size=(1, k_size), + ), + nn.ReLU(), + nn.Conv2d( + in_channels=conv_mid_features, + out_channels=conv_final_features, + kernel_size=(1, n_size), + ), + nn.ReLU(), + ) + + self.final_convolution = nn.Conv2d( + in_channels=conv_final_features + 1, out_channels=1, kernel_size=(1, 1) + ) + + self.softmax = nn.Sequential(nn.Softmax(dim=-1)) + + def mu(self, observation, last_action): + """Defines a most favorable action of this policy given input x. + + Args: + observation: environment observation. + last_action: Last action performed by agent. + + Returns: + Most favorable action. + """ + + if isinstance(observation, np.ndarray): + observation = torch.from_numpy(observation).to(self.device) + if isinstance(last_action, np.ndarray): + last_action = torch.from_numpy(last_action).to(self.device) + + last_stocks, cash_bias = self._process_last_action(last_action) + cash_bias = torch.zeros_like(cash_bias).to(self.device) + + output = self.sequential(observation) # shape [N, 20, PORTFOLIO_SIZE, 1] + output = torch.cat( + [last_stocks, output], dim=1 + ) # shape [N, 21, PORTFOLIO_SIZE, 1] + output = self.final_convolution(output) # shape [N, 1, PORTFOLIO_SIZE, 1] + output = torch.cat( + [cash_bias, output], dim=2 + ) # shape [N, 1, PORTFOLIO_SIZE + 1, 1] + + # output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1) + # and size the number of features (weights vector). + output = torch.squeeze(output, 3) + output = torch.squeeze(output, 1) # shape [N, PORTFOLIO_SIZE + 1] + + output = self.softmax(output) + + return output + + def forward(self, observation, last_action): + """Policy network's forward propagation. + + Args: + observation: Environment observation (dictionary). + last_action: Last action performed by the agent. + + Returns: + Action to be taken (numpy array). + """ + mu = self.mu(observation, last_action) + action = mu.cpu().detach().numpy().squeeze() + return action + + def _process_last_action(self, last_action): + """Process the last action to retrieve cash bias and last stocks. + + Args: + last_action: Last performed action. + + Returns: + Last stocks and cash bias. + """ + batch_size = last_action.shape[0] + stocks = last_action.shape[1] - 1 + last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1)) + cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1)) + return last_stocks, cash_bias + + +class EI3(nn.Module): + def __init__( + self, + initial_features=3, + k_short=3, + k_medium=21, + conv_mid_features=3, + conv_final_features=20, + time_window=50, + device="cpu", + ): + """EI3 (ensemble of identical independent inception) policy network + initializer. + + Args: + initial_features: Number of input features. + k_short: Size of short convolutional kernel. + k_medium: Size of medium convolutional kernel. + conv_mid_features: Size of intermediate convolutional channels. + conv_final_features: Size of final convolutional channels. + time_window: Size of time window used as agent's state. + device: Device in which the neural network will be run. + + Reference: + Reference article: https://doi.org/10.1145/3357384.3357961. + """ + super().__init__() + self.device = device + + n_short = time_window - k_short + 1 + n_medium = time_window - k_medium + 1 + n_long = time_window + + self.short_term = nn.Sequential( + nn.Conv2d( + in_channels=initial_features, + out_channels=conv_mid_features, + kernel_size=(1, k_short), + ), + nn.ReLU(), + nn.Conv2d( + in_channels=conv_mid_features, + out_channels=conv_final_features, + kernel_size=(1, n_short), + ), + nn.ReLU(), + ) + + self.mid_term = nn.Sequential( + nn.Conv2d( + in_channels=3, out_channels=conv_mid_features, kernel_size=(1, k_medium) + ), + nn.ReLU(), + nn.Conv2d( + in_channels=conv_mid_features, + out_channels=conv_final_features, + kernel_size=(1, n_medium), + ), + nn.ReLU(), + ) + + self.long_term = nn.Sequential(nn.MaxPool2d(kernel_size=(1, n_long)), nn.ReLU()) + + self.final_convolution = nn.Conv2d( + in_channels=2 * conv_final_features + initial_features + 1, + out_channels=1, + kernel_size=(1, 1), + ) + + self.softmax = nn.Sequential(nn.Softmax(dim=-1)) + + def mu(self, observation, last_action): + """Defines a most favorable action of this policy given input x. + + Args: + observation: environment observation. + last_action: Last action performed by agent. + + Returns: + Most favorable action. + """ + + if isinstance(observation, np.ndarray): + observation = torch.from_numpy(observation).to(self.device) + if isinstance(last_action, np.ndarray): + last_action = torch.from_numpy(last_action).to(self.device) + + last_stocks, cash_bias = self._process_last_action(last_action) + cash_bias = torch.zeros_like(cash_bias).to(self.device) + + short_features = self.short_term(observation.float()) + medium_features = self.mid_term(observation.float()) + long_features = self.long_term(observation.float()) + + features = torch.cat( + [last_stocks, short_features, medium_features, long_features], dim=1 + ) + output = self.final_convolution(features) + output = torch.cat([cash_bias, output], dim=2) + + # output shape must be [N, features] = [1, PORTFOLIO_SIZE + 1], being N batch size (1) + # and size the number of features (weights vector). + output = torch.squeeze(output, 3) + output = torch.squeeze(output, 1) # shape [N, PORTFOLIO_SIZE + 1] + + output = self.softmax(output) + + return output + + def forward(self, observation, last_action): + """Policy network's forward propagation. + + Args: + observation: Environment observation (dictionary). + last_action: Last action performed by the agent. + + Returns: + Action to be taken (numpy array). + """ + mu = self.mu(observation, last_action) + action = mu.cpu().detach().numpy().squeeze() + return action + + def _process_last_action(self, last_action): + """Process the last action to retrieve cash bias and last stocks. + + Args: + last_action: Last performed action. + + Returns: + Last stocks and cash bias. + """ + batch_size = last_action.shape[0] + stocks = last_action.shape[1] - 1 + last_stocks = last_action[:, 1:].reshape((batch_size, 1, stocks, 1)) + cash_bias = last_action[:, 0].reshape((batch_size, 1, 1, 1)) + return last_stocks, cash_bias diff --git a/finrl/agents/portfolio_optimization/models.py b/finrl/agents/portfolio_optimization/models.py new file mode 100644 index 000000000..bfaa537fa --- /dev/null +++ b/finrl/agents/portfolio_optimization/models.py @@ -0,0 +1,57 @@ +""" +DRL models to solve the portfolio optimization task with reinforcement learning. +This agent was developed to work with environments like PortfolioOptimizationEnv. +""" +from __future__ import annotations + +from .algorithms import PolicyGradient + +MODELS = {"pg": PolicyGradient} + + +class DRLAgent: + """ + Implementation for DRL algorithms for portfolio optimization. + + Attributes + ---------- + env: gym environment class + user-defined class + Methods + ------- + get_model() + setup DRL algorithms + train_model() + train DRL algorithms in a train dataset + and output the trained model + DRL_prediction() + make a prediction in a test dataset and get results + """ + + def __init__(self, env): + self.env = env + + def get_model(self, model_name, model_kwargs=None, policy_kwargs=None): + if model_name not in MODELS: + raise NotImplementedError("The model requested was not implemented.") + + model = MODELS[model_name] + model_kwargs = {} if model_kwargs is None else model_kwargs + if policy_kwargs is not None: + model_kwargs["policy_kwargs"] = policy_kwargs + return model(self.env, **model_kwargs) + + @staticmethod + def train_model(model, episodes=100): + model.train(episodes) + + @staticmethod + def DRL_validation( + model, + test_env, + policy=None, + online_training_period=10, + learning_rate=None, + optimizer=None, + ): + model.test(test_env, policy, online_training_period, learning_rate, optimizer) diff --git a/finrl/agents/portfolio_optimization/utils.py b/finrl/agents/portfolio_optimization/utils.py new file mode 100644 index 000000000..695e8b023 --- /dev/null +++ b/finrl/agents/portfolio_optimization/utils.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from collections import deque + +import numpy as np +from torch.utils.data.dataset import IterableDataset + + +class PVM: + def __init__(self, capacity, portfolio_size): + """Initializes portfolio vector memory. + + Args: + capacity: Max capacity of memory. + portfolio_size: Portfolio size. + """ + # initially, memory will have the same actions + self.capacity = capacity + self.portfolio_size = portfolio_size + self.reset() + + def reset(self): + self.memory = [np.array([1] + [0] * self.portfolio_size, dtype=np.float32)] * ( + self.capacity + 1 + ) + self.index = 0 # initial index to retrieve data + + def retrieve(self): + last_action = self.memory[self.index] + self.index = 0 if self.index == self.capacity else self.index + 1 + return last_action + + def add(self, action): + self.memory[self.index] = action + + +class ReplayBuffer: + def __init__(self, capacity): + """Initializes replay buffer. + + Args: + capacity: Max capacity of buffer. + """ + self.buffer = deque(maxlen=capacity) + + def __len__(self): + """Represents the size of the buffer + + Returns: + Size of the buffer. + """ + return len(self.buffer) + + def append(self, experience): + """Append experience to buffer. When buffer is full, it pops + an old experience. + + Args: + experience: experience to be saved. + """ + self.buffer.append(experience) + + def sample(self): + """Sample from replay buffer. All data from replay buffer is + returned and the buffer is cleared. + + Returns: + Sample of batch_size size. + """ + buffer = list(self.buffer) + self.buffer.clear() + return buffer + + +class RLDataset(IterableDataset): + def __init__(self, buffer): + """Initializes reinforcement learning dataset. + + Args: + buffer: replay buffer to become iterable dataset. + + Note: + It's a subclass of pytorch's IterableDataset, + check https://pytorch.org/docs/stable/data.html#torch.utils.data.IterableDataset + """ + self.buffer = buffer + + def __iter__(self): + """Iterates over RLDataset. + + Returns: + Every experience of a sample from replay buffer. + """ + yield from self.buffer.sample() diff --git a/finrl/meta/env_portfolio_optimization/README.md b/finrl/meta/env_portfolio_optimization/README.md new file mode 100644 index 000000000..6e8bd5c43 --- /dev/null +++ b/finrl/meta/env_portfolio_optimization/README.md @@ -0,0 +1,68 @@ +# PortfolioOptimizationEnv (POE) + +This environment simulates the effects of the market in a portfolio that is periodically rebalanced through a reinforcement learning agent. At every timestep $t$, the agent is responsible for determining a portfolio vector $W_{t}$ which contains the percentage of money invested in each stock. The environment, then, utilizes data provided by the user to simulate the new portfolio value at time-step $t+1$. + +For more details on the formulation of this problem, check the following paper: + +[POE: A General Portfolio Optimization Environment for FinRL](https://doi.org/10.5753/bwaif.2023.231144) +``` +@inproceedings{bwaif, + author = {Caio Costa and Anna Costa}, + title = {POE: A General Portfolio Optimization Environment for FinRL}, + booktitle = {Anais do II Brazilian Workshop on Artificial Intelligence in Finance}, + location = {João Pessoa/PB}, + year = {2023}, + keywords = {}, + issn = {0000-0000}, + pages = {132--143}, + publisher = {SBC}, + address = {Porto Alegre, RS, Brasil}, + doi = {10.5753/bwaif.2023.231144}, + url = {https://sol.sbc.org.br/index.php/bwaif/article/view/24959} +} +``` + +## Inputs +This environment simulates the interactions between an agent and the financial market +based on data provided by a dataframe. The dataframe contains the time series of +features defined by the user (such as closing, high and low prices) and must have +a time and a tic column with a list of datetimes and ticker symbols respectively. +An example of dataframe is shown below: +`````` + date high low close tic +0 2020-12-23 0.157414 0.127420 0.136394 ADA-USD +1 2020-12-23 34.381519 30.074295 31.097898 BNB-USD +2 2020-12-23 24024.490234 22802.646484 23241.345703 BTC-USD +3 2020-12-23 0.004735 0.003640 0.003768 DOGE-USD +4 2020-12-23 637.122803 560.364258 583.714600 ETH-USD +... ... ... ... ... ... +`````` + +## Actions + +At each time step, the environment expects an action that is a one-dimensional Box of shape (n+1,), where $n$ is the number of stocks in the portfolio. This action is called *portfolio vector* and contains, for the remaining cash and for each stock, the percentage of allocated money. + +For example: given a portfolio of three stocks, a valid portfolio vector would b $W_{t} = [0.25, 0.4, 0.2, 0.15]$. In this example, 25% of the money is not invested (remaining cash), 40% is invested in stock 1, 20% in stock 2 and 15% in sotck 3. + +**Note:** It's important that the sum of the values in the portfolio vator is equal (or very close) to 1. If it's not, POE will apply a softmax normalization. + +## Observations + +POE can return two types of observations during simulation: a Dict or a Box. + +- The box is a three-dimensional array of shape $(f, n, t)$, where $f$ s the number of features, $n$ is the number of stocks in the portfolio and $t$ is the time series timw window. This observation basically only contains the current state of the agent. + +- The dict representation, on the other hand, is a dictionary containing the state and the last portfolio vector, like below: + +```json +{ +"state": "three-dimensional Box (f, n, t representing the time series", +"last_action": "one-dimensional Box (n+1,) representing the portfolio weights" +} +``` + +## Rewards +Given the simulation of timestep $t$, the reward is given by the following formula: $r_{t} = ln(V_{t}/V_{t-1})$, where $V_{t}$ is the value of the portfolio at time $t$. By using this formulation, the reward is negative whenever the portfolio value decreases due to a rebalancing and is positive otherwise. + +## Example +A jupyter notebook using this environment can be found [here](/examples/FinRL_PortfolioOptimizationEnv_Demo.ipynb). diff --git a/finrl/meta/env_portfolio_optimization/__init__.py b/finrl/meta/env_portfolio_optimization/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/finrl/meta/env_portfolio_optimization/env_portfolio_optimization.py b/finrl/meta/env_portfolio_optimization/env_portfolio_optimization.py new file mode 100644 index 000000000..533d56ce5 --- /dev/null +++ b/finrl/meta/env_portfolio_optimization/env_portfolio_optimization.py @@ -0,0 +1,644 @@ +"""From FinRL https://github.com/AI4Finance-LLC/FinRL/tree/master/finrl/env""" +from __future__ import annotations + +import math + +import gym +import matplotlib +import numpy as np +import pandas as pd +from gym import spaces +from gym.utils import seeding + +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from stable_baselines3.common.vec_env import DummyVecEnv +from pathlib import Path + +try: + import quantstats as qs +except ModuleNotFoundError: + raise ModuleNotFoundError( + """QuantStats module not found, environment can't plot results and calculate indicadors. + This module is not installed with FinRL. Install by running one of the options: + pip install quantstats --upgrade --no-cache-dir + conda install -c ranaroussi quantstats + """ + ) + + +class PortfolioOptimizationEnv(gym.Env): + """A portfolio allocation environment for OpenAI gym. + + This environment simulates the interactions between an agent and the financial market + based on data provided by a dataframe. The dataframe contains the time series of + features defined by the user (such as closing, high and low prices) and must have + a time and a tic column with a list of datetimes and ticker symbols respectively. + An example of dataframe is shown below:: + + date high low close tic + 0 2020-12-23 0.157414 0.127420 0.136394 ADA-USD + 1 2020-12-23 34.381519 30.074295 31.097898 BNB-USD + 2 2020-12-23 24024.490234 22802.646484 23241.345703 BTC-USD + 3 2020-12-23 0.004735 0.003640 0.003768 DOGE-USD + 4 2020-12-23 637.122803 560.364258 583.714600 ETH-USD + ... ... ... ... ... ... + + Based on this dataframe, the environment will create an observation space that can + be a Dict or a Box. The Box observation space is a three-dimensional array of shape + (f, n, t), where f is the number of features, n is the number of stocks in the + portfolio and t is the user-defined time window. If the environment is created with + the parameter return_last_action set to True, the observation space is a Dict with + the following keys:: + + { + "state": three-dimensional Box (f, n, t) representing the time series, + "last_action": one-dimensional Box (n+1,) representing the portfolio weights + } + + Note that the action space of this environment is an one-dimensional Box with size + n + 1 because the portfolio weights must contains the weights related to all the + stocks in the portfolio and to the remaining cash. + + Attributes: + action_space: Action space. + observation_space: Observation space. + episode_length: Number of timesteps of an episode. + portfolio_size: Number of stocks in the portfolio. + """ + + metadata = {"render.modes": ["human"]} + + def __init__( + self, + df, + initial_amount, + order_df=True, + return_last_action=False, + normalize_df="by_previous_time", + reward_scaling=1, + comission_fee_model="trf", + comission_fee_pct=0, + features=["close", "high", "low"], + valuation_feature="close", + time_column="date", + time_format="%Y-%m-%d", + tic_column="tic", + time_window=1, + cwd="./", + new_gym_api=False, + ): + """Initializes environment's instance. + + Args: + df: Dataframe with market information over a period of time. + initial_amount: Initial amount of cash available to be invested. + order_df: If True input dataframe is ordered by time. + return_last_action: If True, observations also return the last performed + action. Note that, in that case, the observation space is a Dict. + normalize_df: Defines the normalization method applied to input dataframe. + Possible values are "by_previous_time", "by_fist_time_window_value", + "by_COLUMN_NAME" (where COLUMN_NAME must be changed to a real column + name) and a custom function. If None no normalization is done. + reward_scaling: A scaling factor to multiply the reward function. This + factor can help training. + comission_fee_model: Model used to simulate comission fee. Possible values + are "trf" (for transaction remainder factor model) and "wvm" (for weights + vector modifier model). If None, commission fees are not considered. + comission_fee_pct: Percentage to be used in comission fee. It must be a value + between 0 and 1. + features: List of features to be considered in the observation space. The + items of the list must be names of columns of the input dataframe. + valuation_feature: Feature to be considered in the portfolio value calculation. + time_column: Name of the dataframe's column that contain the datetimes that + index the dataframe. + time_format: Formatting string of time column. + tic_name: Name of the dataframe's column that contain ticker symbols. + time_window: Size of time window. + cwd: Local repository in which resulting graphs will be saved. + new_gym_api: If True, the environment will use the new gym api standard for + step and reset methods. + """ + # super(StockEnv, self).__init__() + # money = 10 , scope = 1 + self._time_window = time_window + self._time_index = time_window - 1 + self._time_column = time_column + self._time_format = time_format + self._tic_column = tic_column + self._df = df + self._initial_amount = initial_amount + self._return_last_action = return_last_action + self._reward_scaling = reward_scaling + self._comission_fee_pct = comission_fee_pct + self._comission_fee_model = comission_fee_model + self._features = features + self._valuation_feature = valuation_feature + self._cwd = Path(cwd) + self._new_gym_api = new_gym_api + + # results file + self._results_file = self._cwd / "results" / "rl" + self._results_file.mkdir(parents=True, exist_ok=True) + + # price variation + self._df_price_variation = None + + # preprocess data + self._preprocess_data(order_df, normalize_df) + + # dims and spaces + self._tic_list = self._df[self._tic_column].unique() + self.portfolio_size = len(self._tic_list) + action_space = 1 + self.portfolio_size + + # sort datetimes and define episode length + self._sorted_times = sorted(set(self._df[time_column])) + self.episode_length = len(self._sorted_times) - time_window + 1 + + # define action space + self.action_space = spaces.Box(low=0, high=1, shape=(action_space,)) + + # define observation state + if self._return_last_action: + # if last action must be returned, a dict observation + # is defined + self.observation_space = spaces.Dict( + { + "state": spaces.Box( + low=-np.inf, + high=np.inf, + shape=( + len(self._features), + self.portfolio_size, + self._time_window, + ), + ), + "last_action": spaces.Box(low=0, high=1, shape=(action_space,)), + } + ) + else: + # if information about last action is not relevant, + # a 3D observation space is defined + self.observation_space = spaces.Box( + low=-np.inf, + high=np.inf, + shape=(len(self._features), self.portfolio_size, self._time_window), + ) + + self._reset_memory() + + self._portfolio_value = self._initial_amount + self._terminal = False + + def step(self, actions): + """Performs a simulation step. + + Args: + actions: An unidimensional array containing the new portfolio + weights. + + Note: + If the environment was created with "return_last_action" set to + True, the next state returned will be a Dict. If it's set to False, + the next state will be a Box. You can check the observation state + through the attribute "observation_space". + + Returns: + If "new_gym_api" is set to True, the following tuple is returned: + (state, reward, terminal, truncated, info). If it's set to False, + the following tuple is returned: (state, reward, terminal, info). + + state: Next simulation state. + reward: Reward related to the last performed action. + terminal: If True, the environment is in a terminal state. + truncated: If True, the environment has passed it's simulation + time limit. Currently, it's always False. + info: A dictionary containing informations about the last state. + """ + self._terminal = self._time_index >= len(self._sorted_times) - 1 + + if self._terminal: + metrics_df = pd.DataFrame( + { + "date": self._date_memory, + "returns": self._portfolio_return_memory, + "rewards": self._portfolio_reward_memory, + "portfolio_values": self._asset_memory["final"], + } + ) + metrics_df.set_index("date", inplace=True) + + plt.plot(metrics_df["portfolio_values"], "r") + plt.title("Portfolio Value Over Time") + plt.xlabel("Time") + plt.ylabel("Portfolio value") + plt.savefig(self._results_file / "portfolio_value.png") + plt.close() + + plt.plot(self._portfolio_reward_memory, "r") + plt.title("Reward Over Time") + plt.xlabel("Time") + plt.ylabel("Reward") + plt.savefig(self._results_file / "reward.png") + plt.close() + + plt.plot(self._actions_memory) + plt.title("Actions performed") + plt.xlabel("Time") + plt.ylabel("Weight") + plt.savefig(self._results_file / "actions.png") + plt.close() + + print("=================================") + print("Initial portfolio value:{}".format(self._asset_memory["final"][0])) + print(f"Final portfolio value: {self._portfolio_value}") + print( + "Final accumulative portfolio value: {}".format( + self._portfolio_value / self._asset_memory["final"][0] + ) + ) + print( + "Maximum DrawDown: {}".format( + qs.stats.max_drawdown(metrics_df["portfolio_values"]) + ) + ) + print("Sharpe ratio: {}".format(qs.stats.sharpe(metrics_df["returns"]))) + print("=================================") + + qs.plots.snapshot( + metrics_df["returns"], + show=False, + savefig=self._results_file / "portfolio_summary.png", + ) + + if self._new_gym_api: + return self._state, self._reward, self._terminal, False, self._info + return self._state, self._reward, self._terminal, self._info + + else: + # transform action to numpy array (if it's a list) + actions = np.array(actions, dtype=np.float32) + + # if necessary, normalize weights + if math.isclose(np.sum(actions), 1, abs_tol=1e-6) and np.min(actions) >= 0: + weights = actions + else: + weights = self._softmax_normalization(actions) + + # save initial portfolio weights for this time step + self._actions_memory.append(weights) + + # get last step final weights and portfolio_value + last_weights = self._final_weights[-1] + + # load next state + self._time_index += 1 + self._state, self._info = self._get_state_and_info_from_time_index( + self._time_index + ) + + # if using weights vector modifier, we need to modify weights vector + if self._comission_fee_model == "wvm": + delta_weights = weights - last_weights + delta_assets = delta_weights[1:] # disconsider + # calculate fees considering weights modification + fees = np.sum(np.abs(delta_assets * self._portfolio_value)) + if fees > weights[0] * self._portfolio_value: + weights = last_weights + # maybe add negative reward + else: + portfolio = weights * self._portfolio_value + portfolio[0] -= fees + self._portfolio_value = np.sum(portfolio) # new portfolio value + weights = portfolio / self._portfolio_value # new weights + elif self._comission_fee_model == "trf": + last_mu = 1 + mu = 1 - 2 * self._comission_fee_pct + self._comission_fee_pct**2 + while abs(mu - last_mu) > 1e-10: + last_mu = mu + mu = ( + 1 + - self._comission_fee_pct * weights[0] + - (2 * self._comission_fee_pct - self._comission_fee_pct**2) + * np.sum(np.maximum(last_weights[1:] - mu * weights[1:], 0)) + ) / (1 - self._comission_fee_pct * weights[0]) + self._info["trf_mu"] = mu + self._portfolio_value = mu * self._portfolio_value + + # save initial portfolio value of this time step + self._asset_memory["initial"].append(self._portfolio_value) + + # time passes and time variation changes the portfolio distribution + portfolio = self._portfolio_value * (weights * self._price_variation) + + # calculate new portfolio value and weights + self._portfolio_value = np.sum(portfolio) + weights = portfolio / self._portfolio_value + + # save final portfolio value and weights of this time step + self._asset_memory["final"].append(self._portfolio_value) + self._final_weights.append(weights) + + # save date memory + self._date_memory.append(self._info["end_time"]) + + # define portfolio return + rate_of_return = ( + self._asset_memory["final"][-1] / self._asset_memory["final"][-2] + ) + portfolio_return = rate_of_return - 1 + portfolio_reward = np.log(rate_of_return) + + # save portfolio return memory + self._portfolio_return_memory.append(portfolio_return) + self._portfolio_reward_memory.append(portfolio_reward) + + # Define portfolio return + self._reward = portfolio_reward + self._reward = self._reward * self._reward_scaling + + if self._new_gym_api: + return self._state, self._reward, self._terminal, False, self._info + return self._state, self._reward, self._terminal, self._info + + def reset(self): + """Resets the environment and returns it to its initial state (the + fist date of the dataframe). + + Note: + If the environment was created with "return_last_action" set to + True, the initial state will be a Dict. If it's set to False, + the initial state will be a Box. You can check the observation + state through the attribute "observation_space". + + Returns: + If "new_gym_api" is set to True, the following tuple is returned: + (state, info). If it's set to False, only the initial state is + returned. + + state: Initial state. + info: Initial state info. + """ + # time_index must start a little bit in the future to implement lookback + self._time_index = self._time_window - 1 + self._reset_memory() + + self._state, self._info = self._get_state_and_info_from_time_index( + self._time_index + ) + self._portfolio_value = self._initial_amount + self._terminal = False + + if self._new_gym_api: + return self._state, self._info + return self._state + + def _get_state_and_info_from_time_index(self, time_index): + """Gets state and information given a time index. It also updates "data" + attribute with information about the current simulation step. + + Args: + time_index: An integer that represents the index of a specific datetime. + The initial datetime of the dataframe is given by 0. + + Note: + If the environment was created with "return_last_action" set to + True, the returned state will be a Dict. If it's set to False, + the returned state will be a Box. You can check the observation + state through the attribute "observation_space". + + Returns: + A tuple with the following form: (state, info). + + state: The state of the current time index. It can be a Box or a Dict. + info: A dictionary with some informations about the current simulation + step. The dict has the following keys:: + + { + "tics": List of ticker symbols, + "start_time": Start time of current time window, + "start_time_index": Index of start time of current time window, + "end_time": End time of current time window, + "end_time_index": Index of end time of current time window, + "data": Data related to the current time window, + "price_variation": Price variation of current time step + } + """ + # returns state in form (channels, tics, timesteps) + end_time = self._sorted_times[time_index] + start_time = self._sorted_times[time_index - (self._time_window - 1)] + + # define data to be used in this time step + self._data = self._df[ + (self._df[self._time_column] >= start_time) + & (self._df[self._time_column] <= end_time) + ][[self._time_column, self._tic_column] + self._features] + + # define price variation of this time_step + self._price_variation = self._df_price_variation[ + self._df_price_variation[self._time_column] == end_time + ][self._valuation_feature].to_numpy() + self._price_variation = np.insert(self._price_variation, 0, 1) + + # define state to be returned + state = None + for tic in self._tic_list: + tic_data = self._data[self._data[self._tic_column] == tic] + tic_data = tic_data[self._features].to_numpy().T + tic_data = tic_data[..., np.newaxis] + state = tic_data if state is None else np.append(state, tic_data, axis=2) + state = state.transpose((0, 2, 1)) + info = { + "tics": self._tic_list, + "start_time": start_time, + "start_time_index": time_index - (self._time_window - 1), + "end_time": end_time, + "end_time_index": time_index, + "data": self._data, + "price_variation": self._price_variation, + } + return self._standardize_state(state), info + + def render(self, mode="human"): + """Renders the environment. + + Returns: + Observation of current simulation step. + """ + return self._state + + def _softmax_normalization(self, actions): + """Normalizes the action vector using softmax function. + + Returns: + Normalized action vector (portfolio vector). + """ + numerator = np.exp(actions) + denominator = np.sum(np.exp(actions)) + softmax_output = numerator / denominator + return softmax_output + + def enumerate_portfolio(self): + """Enumerates the current porfolio by showing the ticker symbols + of all the investments considered in the portfolio. + """ + print("Index: 0. Tic: Cash") + for index, tic in enumerate(self._tic_list): + print(f"Index: {index + 1}. Tic: {tic}") + + def _preprocess_data(self, order, normalize): + """Orders and normalizes the environment's dataframe. + + Args: + order: If true, the dataframe will be ordered by ticker list + and datetime. + normalize: Defines the normalization method applied to the dataframe. + Possible values are "by_previous_time", "by_fist_time_window_value", + "by_COLUMN_NAME" (where COLUMN_NAME must be changed to a real column + name) and a custom function. If None no normalization is done. + """ + # order time dataframe by tic and time + if order: + self._df = self._df.sort_values(by=[self._tic_column, self._time_column]) + # defining price variation after ordering dataframe + self._df_price_variation = self._temporal_variation_df() + # apply normalization + if normalize: + self._normalize_dataframe(normalize) + # transform str to datetime + self._df[self._time_column] = pd.to_datetime(self._df[self._time_column]) + self._df_price_variation[self._time_column] = pd.to_datetime( + self._df_price_variation[self._time_column] + ) + # transform numeric variables to float32 (compatibility with pytorch) + self._df[self._features] = self._df[self._features].astype("float32") + self._df_price_variation[self._features] = self._df_price_variation[ + self._features + ].astype("float32") + + def _reset_memory(self): + """Resets the environment's memory.""" + date_time = self._sorted_times[self._time_index] + # memorize portfolio value each step + self._asset_memory = { + "initial": [self._initial_amount], + "final": [self._initial_amount], + } + # memorize portfolio return and reward each step + self._portfolio_return_memory = [0] + self._portfolio_reward_memory = [0] + # initial action: all money is allocated in cash + self._actions_memory = [ + np.array([1] + [0] * self.portfolio_size, dtype=np.float32) + ] + # memorize portfolio weights at the ending of time step + self._final_weights = [ + np.array([1] + [0] * self.portfolio_size, dtype=np.float32) + ] + # memorize datetimes + self._date_memory = [date_time] + + def _standardize_state(self, state): + """Standardize the state given the observation space. If "return_last_action" + is set to False, a three-dimensional box is returned. If it's set to True, a + dictionary is returned. The dictionary follows the standard below:: + + { + "state": Three-dimensional box representing the current state, + "last_action": One-dimensional box representing the last action + } + """ + last_action = self._actions_memory[-1] + if self._return_last_action: + return {"state": state, "last_action": last_action} + else: + return state + + def _normalize_dataframe(self, normalize): + """ "Normalizes the environment's dataframe. + + Args: + normalize: Defines the normalization method applied to the dataframe. + Possible values are "by_previous_time", "by_fist_time_window_value", + "by_COLUMN_NAME" (where COLUMN_NAME must be changed to a real column + name) and a custom function. If None no normalization is done. + + Note: + If a custom function is used in the normalization, it must have an + argument representing the environment's dataframe. + """ + if type(normalize) == str: + if normalize == "by_fist_time_window_value": + print( + "Normalizing {} by first time window value...".format( + self._features + ) + ) + self._df = self._temporal_variation_df(self._time_window - 1) + elif normalize == "by_previous_time": + print(f"Normalizing {self._features} by previous time...") + self._df = self._temporal_variation_df() + elif normalize.startswith("by_"): + normalizer_column = normalize[3:] + print(f"Normalizing {self._features} by {normalizer_column}") + for column in self._features: + self._df[column] = self._df[column] / self._df[normalizer_column] + elif callable(normalize): + print("Applying custom normalization function...") + self._df = normalize(self._df) + else: + print("No normalization was performed.") + + def _temporal_variation_df(self, periods=1): + """Calculates the temporal variation dataframe. For each feature, this + dataframe contains the rate of the current feature's value and the last + feature's value given a period. It's used to normalize the dataframe. + + Args: + periods: Periods (in time indexes) to calculate temporal variation. + + Returns: + Temporal variation dataframe. + """ + df_temporal_variation = self._df.copy() + prev_columns = [] + for column in self._features: + prev_column = f"prev_{column}" + prev_columns.append(prev_column) + df_temporal_variation[prev_column] = df_temporal_variation.groupby( + self._tic_column + )[column].shift(periods=periods) + df_temporal_variation[column] = ( + df_temporal_variation[column] / df_temporal_variation[prev_column] + ) + df_temporal_variation = ( + df_temporal_variation.drop(columns=prev_columns) + .fillna(1) + .reset_index(drop=True) + ) + return df_temporal_variation + + def _seed(self, seed=None): + """Seeds the sources of randomness of this environment to guarantee + reproducibility. + + Args: + seed: Seed value to be applied. + + Returns: + Seed value applied. + """ + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def get_sb_env(self, env_number=1): + """Generates an environment compatible with Stable Baselines 3. The + generated environment is a vectorized version of the current one. + + Returns: + A tuple with the generated environment and an initial observation. + """ + e = DummyVecEnv([lambda: self] * env_number) + obs = e.reset() + return e, obs