added soilgrids; finished attributes

CH-Earth · Mar 9, 2024 · d4ad4d5 · d4ad4d5
1 parent a51fa54
commit d4ad4d5
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 200 deletions.
diff --git a/0_config/config.txt b/0_config/config.txt
@@ -100,6 +100,9 @@ geology_url          | https://borealisdata.ca/file.xhtml?fileId=72026&version=1
 climate_path         | worldclim                                                                                | string  # Folder for Pelletier soil data downloads
 climate_urls         | 0_config/worldclim_links.txt                                                             | string  # File with download links for Pelletier soil data, relative to 'code_path'
 
-# 9. Analysis
+# 9. Attributes
+att_path   | attributes | string  # Folder with where attribute .csv should go; appended to 'data_path'
+
+# 10. Analysis
 image_path | /Users/wmk934/OneDrive - University of Calgary/Postdoc/Documents/papers/paper-Knoben-CAMELspat/img | string  # Path where images will be stored
 
diff --git a/8_geospatial_data/3_calculate_attributes.ipynb b/8_geospatial_data/3_calculate_attributes.ipynb
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "id": "5aa42aa7-cf4d-4c51-8555-05638e2ffc3d",
    "metadata": {},
    "outputs": [],
@@ -65,7 +65,12 @@
     "# Get the temporary data folder\n",
     "cs_temp_folder = cs.read_from_config(config_file, 'temp_path')\n",
     "temp_path = Path(cs_temp_folder)\n",
-    "temp_path.mkdir(exist_ok=True, parents=True)"
+    "temp_path.mkdir(exist_ok=True, parents=True)\n",
+    "\n",
+    "# Get the attribute folder\n",
+    "att_folder = cs.read_from_config(config_file, 'att_path')\n",
+    "att_path = basins_path / att_folder\n",
+    "att_path.mkdir(parents=True, exist_ok=True)"
    ]
   },
   {
@@ -78,7 +83,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "id": "333bd63a-b0a1-417d-b9c6-d28306a0120d",
    "metadata": {},
    "outputs": [],
@@ -90,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
    "id": "d5a497a0-a9a2-404c-9b2a-8173cbb1436b",
    "metadata": {},
    "outputs": [],
@@ -109,7 +114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
    "id": "3185dc90-2a46-46b2-bdd1-c0032c2c452e",
    "metadata": {},
    "outputs": [],
@@ -119,18 +124,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
    "id": "f9c395a8-7e5d-4140-b61d-65581fc05a23",
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_subfolders = ['era5', 'worldclim', 'hydrology', 'lai', 'forest_height', 'glclu2019', 'modis_land', 'lgrip30', 'merit', 'hydrolakes', 'pelletier', 'glhymps'] # \n",
-    "#, 'soilgrids'  "
+    "data_subfolders = ['era5', 'worldclim', 'hydrology', 'lai', 'forest_height', 'glclu2019', 'modis_land', 'lgrip30', 'merit', 'hydrolakes', 'pelletier', 'soilgrids', 'glhymps']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "id": "11e6b92c-35d3-4358-a3fe-0daec6cd96f4",
    "metadata": {},
    "outputs": [],
@@ -141,7 +145,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
    "id": "2b44331c-4cd6-4b2a-ba99-db3a767f9ddd",
    "metadata": {},
    "outputs": [
@@ -192,6 +196,7 @@
       " - processing merit\n",
       " - processing hydrolakes\n",
       " - processing pelletier\n",
+      " - processing soilgrids\n",
       " - processing glhymps\n",
       "\n",
       "!!! CHECK DEBUGGING STATUS: \n",
@@ -272,17 +277,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "id": "9d197b84-b519-487b-aaf4-b14929671829",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(840, 840)"
+       "(1128, 1128)"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -301,7 +306,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "id": "9f038ae6-5dff-43cf-a462-8d31eea3d39a",
    "metadata": {},
    "outputs": [
@@ -411,7 +416,7 @@
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>840 rows × 1 columns</p>\n",
+       "<p>1128 rows × 1 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
@@ -429,10 +434,10 @@
        "         log_permeability_max  m^2   GLHYMPS               -12.5\n",
        "         log_permeability_std  m^2   GLHYMPS            1.234355\n",
        "\n",
-       "[840 rows x 1 columns]"
+       "[1128 rows x 1 columns]"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -455,189 +460,23 @@
     "df"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "309ae298-c51c-4fff-b74f-201e02ba5567",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.to_csv('test_new_function5.csv')"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 17,
-   "id": "b27b6090-0a20-48aa-8c85-0a0c5e3ceff4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>CAN_01AD002</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>Category</th>\n",
-       "      <th>Attribute</th>\n",
-       "      <th>Unit</th>\n",
-       "      <th>Source</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"11\" valign=\"top\">Climate</th>\n",
-       "      <th>mper_mean_month_01</th>\n",
-       "      <th>mm</th>\n",
-       "      <th>ERA5</th>\n",
-       "      <td>5.603236</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mper_mean_month_02</th>\n",
-       "      <th>mm</th>\n",
-       "      <th>ERA5</th>\n",
-       "      <td>6.93889</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mper_mean_month_03</th>\n",
-       "      <th>mm</th>\n",
-       "      <th>ERA5</th>\n",
-       "      <td>11.45397</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mper_mean_month_04</th>\n",
-       "      <th>mm</th>\n",
-       "      <th>ERA5</th>\n",
-       "      <td>17.082485</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mper_mean_month_05</th>\n",
-       "      <th>mm</th>\n",
-       "      <th>ERA5</th>\n",
-       "      <td>29.253726</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <th>...</th>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>fracsnow2_std_month_10</th>\n",
-       "      <th>-</th>\n",
-       "      <th>WorldClim</th>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>fracsnow2_mean_month_11</th>\n",
-       "      <th>-</th>\n",
-       "      <th>WorldClim</th>\n",
-       "      <td>0.999119</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>fracsnow2_std_month_11</th>\n",
-       "      <th>-</th>\n",
-       "      <th>WorldClim</th>\n",
-       "      <td>0.029672</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>fracsnow2_mean_month_12</th>\n",
-       "      <th>-</th>\n",
-       "      <th>WorldClim</th>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>fracsnow2_std_month_12</th>\n",
-       "      <th>-</th>\n",
-       "      <th>WorldClim</th>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>600 rows × 1 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                CAN_01AD002\n",
-       "Category Attribute               Unit Source               \n",
-       "Climate  mper_mean_month_01      mm   ERA5         5.603236\n",
-       "         mper_mean_month_02      mm   ERA5          6.93889\n",
-       "         mper_mean_month_03      mm   ERA5         11.45397\n",
-       "         mper_mean_month_04      mm   ERA5        17.082485\n",
-       "         mper_mean_month_05      mm   ERA5        29.253726\n",
-       "...                                                     ...\n",
-       "         fracsnow2_std_month_10  -    WorldClim         0.0\n",
-       "         fracsnow2_mean_month_11 -    WorldClim    0.999119\n",
-       "         fracsnow2_std_month_11  -    WorldClim    0.029672\n",
-       "         fracsnow2_mean_month_12 -    WorldClim         1.0\n",
-       "         fracsnow2_std_month_12  -    WorldClim         0.0\n",
-       "\n",
-       "[600 rows x 1 columns]"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Example of selection code\n",
-    "tmp = df.loc[df.index.get_level_values('Category').str.contains('Climate')]# & \n",
-    "             #df.index.get_level_values('Attribute').str.contains('mean')].copy()\n",
-    "tmp"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6127feb5-ed86-424b-8367-d4ec38177968",
-   "metadata": {},
-   "source": [
-    "## DEV"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "9fe183cc-8879-4189-9e21-014a1a76204f",
+   "id": "309ae298-c51c-4fff-b74f-201e02ba5567",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import baseflow\n",
-    "import geopandas as gpd\n",
-    "import glob\n",
-    "import numpy as np\n",
-    "from rasterstats import zonal_stats\n",
-    "import rasterio\n",
-    "from scipy.stats import circmean, circstd, skew, kurtosis\n",
-    "from scipy.optimize import curve_fit\n",
-    "import xarray as xr"
+    "att_file = f'attributes_{basin_id}.csv'\n",
+    "df.to_csv(att_path/att_file)"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "5ecd30bf-7f36-4a08-89bb-cc86515a05ab",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
    "source": [
     "### High-level collection functions"
    ]
@@ -668,10 +507,10 @@
     "                zonal_out = zonal_stats(shp_str, tif, stats=stats)\n",
     "                scale,offset = csa.read_scale_and_offset(tif)\n",
     "                l_values = csa.update_values_list(l_values, stats, zonal_out, scale, offset)\n",
-    "                l_index += [('Soil', f'{sub_folder}_{depth}_{stat}_min',  f'{unit}', 'SOILGRIDS'),\n",
-    "                            ('Soil', f'{sub_folder}_{depth}_{stat}_mean', f'{unit}', 'SOILGRIDS'),\n",
-    "                            ('Soil', f'{sub_folder}_{depth}_{stat}_max',  f'{unit}', 'SOILGRIDS'),\n",
-    "                            ('Soil', f'{sub_folder}_{depth}_{stat}_std',  f'{unit}', 'SOILGRIDS')]\n",
+    "                l_index += [('Soil', f'{sub_folder}_{depth}_{field}_min',  f'{unit}', 'SOILGRIDS'),\n",
+    "                            ('Soil', f'{sub_folder}_{depth}_{field}_mean', f'{unit}', 'SOILGRIDS'),\n",
+    "                            ('Soil', f'{sub_folder}_{depth}_{field}_max',  f'{unit}', 'SOILGRIDS'),\n",
+    "                            ('Soil', f'{sub_folder}_{depth}_{field}_std',  f'{unit}', 'SOILGRIDS')]\n",
     "\n",
     "    return l_values, l_index"
    ]

diff --git a/python_cs_functions/attributes.py b/python_cs_functions/attributes.py
@@ -30,12 +30,12 @@ def attributes_from_soilgrids(geo_folder, dataset, shp_str, l_values, l_index):
             for field in fields:
                 tif = str(geo_folder / dataset / 'raw' / f'{sub_folder}' / f'{sub_folder}_{depth}_{field}.tif')
                 zonal_out = zonal_stats(shp_str, tif, stats=stats)
-                scale,offset = csa.read_scale_and_offset(tif)
-                l_values = csa.update_values_list(l_values, stats, zonal_out, scale, offset)
-                l_index += [('Soil', f'{sub_folder}_{depth}_{stat}_min',  f'{unit}', 'SOILGRIDS'),
-                            ('Soil', f'{sub_folder}_{depth}_{stat}_mean', f'{unit}', 'SOILGRIDS'),
-                            ('Soil', f'{sub_folder}_{depth}_{stat}_max',  f'{unit}', 'SOILGRIDS'),
-                            ('Soil', f'{sub_folder}_{depth}_{stat}_std',  f'{unit}', 'SOILGRIDS')]
+                scale,offset = read_scale_and_offset(tif)
+                l_values = update_values_list(l_values, stats, zonal_out, scale, offset)
+                l_index += [('Soil', f'{sub_folder}_{depth}_{field}_min',  f'{unit}', 'SOILGRIDS'),
+                            ('Soil', f'{sub_folder}_{depth}_{field}_mean', f'{unit}', 'SOILGRIDS'),
+                            ('Soil', f'{sub_folder}_{depth}_{field}_max',  f'{unit}', 'SOILGRIDS'),
+                            ('Soil', f'{sub_folder}_{depth}_{field}_std',  f'{unit}', 'SOILGRIDS')]
 
     return l_values, l_index