From 7ca263a9468a382660e27a1a848dda914fc63318 Mon Sep 17 00:00:00 2001 From: Naoki Mizukami Date: Fri, 11 Oct 2024 17:04:05 -0600 Subject: [PATCH] auto-formatting --- .../nblibrary/rof/month_annual_flow.ipynb | 731 ++++++++++-------- 1 file changed, 400 insertions(+), 331 deletions(-) diff --git a/examples/nblibrary/rof/month_annual_flow.ipynb b/examples/nblibrary/rof/month_annual_flow.ipynb index 1c5125f..1598bbe 100644 --- a/examples/nblibrary/rof/month_annual_flow.ipynb +++ b/examples/nblibrary/rof/month_annual_flow.ipynb @@ -4,9 +4,11 @@ "cell_type": "code", "execution_count": null, "metadata": { - "tags": [ - "parameters" - ] + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [], "source": [ @@ -128,7 +130,9 @@ "slideshow": { "slide_type": "" }, - "tags": [] + "tags": [ + "parameters" + ] }, "outputs": [], "source": [ @@ -178,6 +182,8 @@ "metadata": {}, "outputs": [], "source": [ + "start_date = \"0010-01\"\n", + "end_date = \"0015-12\"\n", "time_period = slice(f\"{start_date}\", f\"{end_date}\") # analysis time period\n", "nyrs = int(end_date[:4]) - int(start_date[:4]) + 1 # number of years\n", "nmons = nyrs * 12 # number of months" @@ -205,7 +211,7 @@ "source": [ "# Spin up cluster (if running in parallel)\n", "client = None\n", - "if not serial:\n", + "if serial:\n", " cluster = LocalCluster(**lc_kwargs)\n", " client = Client(cluster)\n", "\n", @@ -242,13 +248,13 @@ "year_data = {}\n", "seas_data = {}\n", "for case, grid in zip([case_name], [grid_name]):\n", - " in_dire = os.path.join(CESM_output_dir, case_name, \"rof/hist\")\n", + " in_dire = os.path.join(CESM_output_dir, case, \"rof/hist\")\n", " model = case_meta[grid][\"model\"]\n", " domain = case_meta[grid][\"domain_nc\"]\n", " # monthly\n", " month_data[case] = (\n", " xr.open_mfdataset(\n", - " f\"{in_dire}/{case}.{model}.*.nc\",\n", + " f\"{in_dire}/{case}.{model}.h*.001?-*.nc\",\n", " data_vars=\"minimal\",\n", " chunks={\"time\": 12},\n", " )\n", @@ -256,7 +262,7 @@ " .load()\n", " )\n", " # annual\n", - " year_data[case] = month_data[case].resample(time=\"AS\").mean(dim=\"time\")\n", + " year_data[case] = month_data[case].resample(time=\"YS\").mean(dim=\"time\")\n", "\n", " # seasonal (compute here instead of reading for conisistent analysis period)\n", " seas_data[case] = month_data[case].groupby(\"time.month\").mean(\"time\")\n", @@ -264,7 +270,7 @@ " seas_data[case][vars_no_time] = seas_data[case][vars_no_time].isel(\n", " month=0, drop=True\n", " )\n", - "\n", + " time = month_data[case][\"time\"]\n", " if domain == \"None\":\n", " reachID[case] = month_data[case][\"reachID\"].values\n", " else:\n", @@ -365,13 +371,23 @@ ")\n", "\n", "# monthly\n", - "ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n", + "obs_available = True\n", + "if ds_q[\"time\"].sel(time=time_period).values.size == 0:\n", + " obs_available = False\n", + " ds_q_obs_mon = xr.DataArray(\n", + " data=np.ones((len(time), len(ds_q[\"station\"])), dtype=\"float\") * np.nan,\n", + " dims=[\"time\", \"station\"],\n", + " coords=dict(\n", + " station=ds_q[\"station\"],\n", + " time=time,\n", + " ),\n", + " )\n", + "else:\n", + " ds_q_obs_mon = ds_q[\"FLOW\"].sel(time=time_period)\n", "# compute annual flow from monthly\n", "ds_q_obs_yr = ds_q_obs_mon.resample(time=\"YE\").mean(dim=\"time\")\n", "# compute annual cycle at monthly scale\n", - "dr_q_obs_seasonal = (\n", - " ds_q_obs_mon.sel(time=time_period).groupby(\"time.month\").mean(\"time\")\n", - ")" + "dr_q_obs_seasonal = ds_q_obs_mon.groupby(\"time.month\").mean(\"time\")" ] }, { @@ -385,7 +401,13 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ "gauge_plot = {}\n", @@ -459,15 +481,15 @@ " year_data[case][q_name][:, net_idx[0], net_idx[1]].plot(\n", " ax=axes[row, col], linestyle=\"-\", c=color, lw=0.75, label=case\n", " )\n", - "\n", - " ds_q_obs_yr.loc[:, gaug_idx].plot(\n", - " ax=axes[row, col],\n", - " linestyle=\"None\",\n", - " marker=\"o\",\n", - " markersize=3,\n", - " c=\"k\",\n", - " label=\"D17\",\n", - " )\n", + " if obs_available:\n", + " ds_q_obs_yr.loc[:, gaug_idx].plot(\n", + " ax=axes[row, col],\n", + " linestyle=\"None\",\n", + " marker=\"o\",\n", + " markersize=3,\n", + " c=\"k\",\n", + " label=\"D17\",\n", + " )\n", "\n", " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n", "\n", @@ -532,15 +554,16 @@ " ax=axes[row, col], linestyle=\"-\", c=color, lw=1.0, label=case\n", " )\n", "\n", - " dr_q_obs_seasonal.loc[:, gaug_idx].plot(\n", - " ax=axes[row, col],\n", - " linestyle=\":\",\n", - " lw=0.5,\n", - " marker=\"o\",\n", - " markersize=2,\n", - " c=\"k\",\n", - " label=\"D17\",\n", - " )\n", + " if obs_available:\n", + " dr_q_obs_seasonal.loc[:, gaug_idx].plot(\n", + " ax=axes[row, col],\n", + " linestyle=\":\",\n", + " lw=0.5,\n", + " marker=\"o\",\n", + " markersize=2,\n", + " c=\"k\",\n", + " label=\"D17\",\n", + " )\n", "\n", " axes[row, col].set_title(\"%d %s\" % (ix + 1, river_name), fontsize=8)\n", " axes[row, col].set_xlabel(\"\")\n", @@ -578,81 +601,86 @@ "outputs": [], "source": [ "%%time\n", - "# Monthly flow scatter plot\n", "\n", - "fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", - "plt.subplots_adjust(\n", - " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", - ")\n", + "if obs_available:\n", + " # Monthly flow scatter plot\n", + " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", + " plt.subplots_adjust(\n", + " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", + " )\n", "\n", - "for ix, river_name in enumerate(big_river_24.keys()):\n", - " row = ix // 3\n", - " col = ix % 3\n", - " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", + " for ix, river_name in enumerate(big_river_24.keys()):\n", + " row = ix // 3\n", + " col = ix % 3\n", + " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", "\n", - " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", + " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", "\n", - " net_idx = gauge_plot[river_name][case][1]\n", - " gaug_idx = gauge_plot[river_name][case][0]\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", - " q_name = case_meta[grid][\"flow_name\"]\n", - " color = case_meta[grid][\"color\"]\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + " color = case_meta[grid][\"color\"]\n", "\n", - " if len(net_idx) == 1: # means vector\n", - " ds_sim = month_data[case][q_name][:, net_idx].sel(time=time_period)\n", - " elif len(net_idx) == 2: # means 2d grid\n", - " ds_sim = (\n", - " month_data[case][q_name][:, net_idx[0], net_idx[1]]\n", - " .sel(time=time_period)\n", - " .squeeze()\n", - " )\n", + " if len(net_idx) == 1: # means vector\n", + " ds_sim = month_data[case][q_name][:, net_idx].sel(time=time_period)\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " ds_sim = (\n", + " month_data[case][q_name][:, net_idx[0], net_idx[1]]\n", + " .sel(time=time_period)\n", + " .squeeze()\n", + " )\n", "\n", - " ds_obs = ds_q_obs_mon.sel(time=time_period).loc[:, gaug_idx]\n", + " ds_obs = ds_q_obs_mon.sel(time=time_period).loc[:, gaug_idx]\n", + "\n", + " axes[row, col].plot(\n", + " ds_obs, ds_sim, \"o\", c=color, label=case, markersize=4.0, alpha=0.4\n", + " )\n", + " if jx == 0:\n", + " max_val = np.max(ds_obs)\n", + " min_val = np.min(ds_obs)\n", + " else:\n", + " max_val = np.max([np.max(ds_sim), max_val])\n", + " min_val = np.min([np.min(ds_sim), min_val])\n", "\n", " axes[row, col].plot(\n", - " ds_obs, ds_sim, \"o\", c=color, label=case, markersize=4.0, alpha=0.4\n", + " [min_val * 0.98, max_val * 1.02],\n", + " [min_val * 0.98, max_val * 1.02],\n", + " \":k\",\n", + " linewidth=0.5,\n", " )\n", - " if jx == 0:\n", - " max_val = np.max(ds_obs)\n", - " min_val = np.min(ds_obs)\n", + "\n", + " axes[row, col].annotate(\n", + " \"%d %s\" % (ix + 1, river_name),\n", + " xy=(0.05, 0.875),\n", + " xycoords=\"axes fraction\",\n", + " fontsize=8,\n", + " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", + " )\n", + " if row == 7 and col == 1:\n", + " axes[row, col].set_xlabel(\n", + " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n", + " )\n", " else:\n", - " max_val = np.max([np.max(ds_sim), max_val])\n", - " min_val = np.min([np.min(ds_sim), min_val])\n", - "\n", - " axes[row, col].plot(\n", - " [min_val * 0.98, max_val * 1.02],\n", - " [min_val * 0.98, max_val * 1.02],\n", - " \":k\",\n", - " linewidth=0.5,\n", - " )\n", + " axes[row, col].set_xlabel(\"\")\n", "\n", - " axes[row, col].annotate(\n", - " \"%d %s\" % (ix + 1, river_name),\n", - " xy=(0.05, 0.875),\n", - " xycoords=\"axes fraction\",\n", - " fontsize=8,\n", - " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", + " if col == 0 and row == 5:\n", + " axes[row, col].set_ylabel(\n", + " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n", + " )\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", + "\n", + " axes.flatten()[-2].legend(\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", + " ncol=5,\n", + " fontsize=\"x-small\",\n", " )\n", - " if row == 7 and col == 1:\n", - " axes[row, col].set_xlabel(\"Monthly reference discharge [m$^3$/s]\", fontsize=9)\n", - " else:\n", - " axes[row, col].set_xlabel(\"\")\n", "\n", - " if col == 0 and row == 5:\n", - " axes[row, col].set_ylabel(\"Monthly simulated discharge [m$^3$/s]\", fontsize=10)\n", - " else:\n", - " axes[row, col].set_ylabel(\"\")\n", - " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", - "\n", - "axes.flatten()[-2].legend(\n", - " loc=\"upper center\",\n", - " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", - " ncol=5,\n", - " fontsize=\"x-small\",\n", - ")\n", - "\n", - "if figureSave:\n", - " plt.savefig(f\"./NB1_Fig3_big_river_month_scatter_{analysis_name}.png\", dpi=200)" + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig3_big_river_month_scatter_{analysis_name}.png\", dpi=200)" ] }, { @@ -669,79 +697,83 @@ "outputs": [], "source": [ "%%time\n", - "# annual flow scatter plot\n", + "if obs_available:\n", + " # annual flow scatter plot\n", + " fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", + " plt.subplots_adjust(\n", + " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", + " )\n", "\n", - "fig, axes = plt.subplots(8, 3, figsize=(7.50, 15.00))\n", - "plt.subplots_adjust(\n", - " top=0.995, bottom=0.075, right=0.995, left=0.1, wspace=0.25, hspace=0.25\n", - ")\n", + " for ix, river_name in enumerate(big_river_24.keys()):\n", + " row = ix // 3\n", + " col = ix % 3\n", + " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", "\n", - "for ix, river_name in enumerate(big_river_24.keys()):\n", - " row = ix // 3\n", - " col = ix % 3\n", - " axes[row, col].yaxis.set_major_formatter(FormatStrFormatter(\"%.0f\"))\n", + " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", "\n", - " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", + " net_idx = gauge_plot[river_name][case][1]\n", + " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", - " net_idx = gauge_plot[river_name][case][1]\n", - " gaug_idx = gauge_plot[river_name][case][0]\n", + " q_name = case_meta[grid][\"flow_name\"]\n", + " color = case_meta[grid][\"color\"]\n", "\n", - " q_name = case_meta[grid][\"flow_name\"]\n", - " color = case_meta[grid][\"color\"]\n", + " if len(net_idx) == 1: # means vector\n", + " ds_sim = year_data[case][q_name][:, net_idx].sel(time=time_period)\n", + " elif len(net_idx) == 2: # means 2d grid\n", + " ds_sim = year_data[case][q_name][:, net_idx[0], net_idx[1]].sel(\n", + " time=time_period\n", + " )\n", "\n", - " if len(net_idx) == 1: # means vector\n", - " ds_sim = year_data[case][q_name][:, net_idx].sel(time=time_period)\n", - " elif len(net_idx) == 2: # means 2d grid\n", - " ds_sim = year_data[case][q_name][:, net_idx[0], net_idx[1]].sel(\n", - " time=time_period\n", - " )\n", + " ds_obs = ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx]\n", "\n", - " ds_obs = ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx]\n", + " axes[row, col].plot(\n", + " ds_obs, ds_sim, \"o\", c=color, label=case, markersize=4.0, alpha=0.4\n", + " )\n", + " if jx == 0:\n", + " max_val = np.max(ds_obs)\n", + " min_val = np.min(ds_obs)\n", + " else:\n", + " max_val = np.max([np.max(ds_sim), max_val])\n", + " min_val = np.min([np.min(ds_sim), min_val])\n", "\n", " axes[row, col].plot(\n", - " ds_obs, ds_sim, \"o\", c=color, label=case, markersize=4.0, alpha=0.4\n", + " [min_val * 0.98, max_val * 1.02],\n", + " [min_val * 0.98, max_val * 1.02],\n", + " \":k\",\n", + " linewidth=0.5,\n", + " )\n", + "\n", + " axes[row, col].annotate(\n", + " \"%d %s\" % (ix + 1, river_name),\n", + " xy=(0.05, 0.875),\n", + " xycoords=\"axes fraction\",\n", + " fontsize=8,\n", + " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", " )\n", - " if jx == 0:\n", - " max_val = np.max(ds_obs)\n", - " min_val = np.min(ds_obs)\n", + " if row == 7 and col == 1:\n", + " axes[row, col].set_xlabel(\n", + " \"Monthly reference discharge [m$^3$/s]\", fontsize=9\n", + " )\n", " else:\n", - " max_val = np.max([np.max(ds_sim), max_val])\n", - " min_val = np.min([np.min(ds_sim), min_val])\n", - "\n", - " axes[row, col].plot(\n", - " [min_val * 0.98, max_val * 1.02],\n", - " [min_val * 0.98, max_val * 1.02],\n", - " \":k\",\n", - " linewidth=0.5,\n", - " )\n", + " axes[row, col].set_xlabel(\"\")\n", "\n", - " axes[row, col].annotate(\n", - " \"%d %s\" % (ix + 1, river_name),\n", - " xy=(0.05, 0.875),\n", - " xycoords=\"axes fraction\",\n", - " fontsize=8,\n", - " bbox=dict(facecolor=\"white\", edgecolor=\"None\", alpha=0.8),\n", + " if col == 0 and row == 5:\n", + " axes[row, col].set_ylabel(\n", + " \"Monthly simulated discharge [m$^3$/s]\", fontsize=10\n", + " )\n", + " else:\n", + " axes[row, col].set_ylabel(\"\")\n", + " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", + "\n", + " axes.flatten()[-2].legend(\n", + " loc=\"upper center\",\n", + " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", + " ncol=5,\n", + " fontsize=\"x-small\",\n", " )\n", - " if row == 7 and col == 1:\n", - " axes[row, col].set_xlabel(\"Monthly reference discharge [m$^3$/s]\", fontsize=9)\n", - " else:\n", - " axes[row, col].set_xlabel(\"\")\n", "\n", - " if col == 0 and row == 5:\n", - " axes[row, col].set_ylabel(\"Monthly simulated discharge [m$^3$/s]\", fontsize=10)\n", - " else:\n", - " axes[row, col].set_ylabel(\"\")\n", - " axes[row, col].tick_params(\"both\", labelsize=\"xx-small\")\n", - "\n", - "axes.flatten()[-2].legend(\n", - " loc=\"upper center\",\n", - " bbox_to_anchor=(0.10, -0.40, 0.75, 0.1),\n", - " ncol=5,\n", - " fontsize=\"x-small\",\n", - ")\n", - "\n", - "if figureSave:\n", - " plt.savefig(f\"./NB1_Fig4_big_river_annual_scatter_{analysis_name}.png\", dpi=200)" + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig4_big_river_annual_scatter_{analysis_name}.png\", dpi=200)" ] }, { @@ -775,14 +807,12 @@ " # headers\n", " f.write(\"No, river_name,\")\n", " f.write(\"{0: >15}_vol,\".format(\"obs\"))\n", - " for jx, case in enumerate(cases.keys()):\n", - " grid_name = cases[case]\n", - " f.write(\"{0: >15}_vol,\".format(grid_name))\n", + " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", + " f.write(\"{0: >15}_vol,\".format(grid))\n", " f.write(\"{0: >15}_area\".format(\"obs\"))\n", - " for jx, case in enumerate(cases.keys()):\n", - " grid_name = cases[case]\n", + " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", " f.write(\",\")\n", - " f.write(\"{0: >15}_area\".format(grid_name))\n", + " f.write(\"{0: >15}_area\".format(grid))\n", " f.write(\"\\n\")\n", "\n", " # data for each river\n", @@ -824,22 +854,24 @@ " )\n", "\n", " if jx == 0:\n", - " qobs = (\n", - " np.nanmean(\n", - " ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx].values\n", + " if ~np.all(np.isnan(ds_q_obs_yr)):\n", + " qobs = (\n", + " np.nanmean(\n", + " ds_q_obs_yr.sel(time=time_period).loc[:, gaug_idx].values\n", + " )\n", + " * 60\n", + " * 60\n", + " * 24\n", + " * 365\n", + " / 10**9\n", " )\n", - " * 60\n", - " * 60\n", - " * 24\n", - " * 365\n", - " / 10**9\n", - " )\n", + " else:\n", + " qobs = np.nan\n", "\n", " f.write(\"{:19.1f},\".format(qobs))\n", " f.write(\"{:19.1f}\".format(qsim))\n", "\n", - " for jx, case in enumerate(cases.keys()):\n", - " grid_name = cases[case]\n", + " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", " f.write(\",\")\n", " gaug_idx = gauge_plot[river_name][case][0]\n", "\n", @@ -858,6 +890,13 @@ " f.write(\"\\n\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -871,57 +910,65 @@ "metadata": {}, "outputs": [], "source": [ - "df_yr_vol = pd.read_csv(\n", - " f\"./large50rivers_{analysis_name}.txt\",\n", - " skiprows=3,\n", - " index_col=[\"No\"],\n", - " skipinitialspace=True,\n", - ")\n", + "if obs_available:\n", "\n", - "fig, axes = plt.subplots(1, figsize=(5.50, 5.50))\n", - "regressor = LinearRegression()\n", - "bias_text = \"\"\n", + " df_yr_vol = pd.read_csv(\n", + " f\"./large50rivers_{analysis_name}.txt\",\n", + " skiprows=3,\n", + " index_col=[\"No\"],\n", + " skipinitialspace=True,\n", + " )\n", "\n", - "for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", + " fig, axes = plt.subplots(1, figsize=(5.50, 5.50))\n", + " regressor = LinearRegression()\n", + " bias_text = \"\"\n", "\n", - " # compute linear regression\n", - " df_reg = df_yr_vol[[\"obs_vol\", f\"{grid_name}_vol\"]].dropna()\n", - " regressor.fit(df_reg[[\"obs_vol\"]], df_reg[f\"{grid}_vol\"])\n", - " y_pred = regressor.predict(df_reg[[\"obs_vol\"]])\n", + " for jx, (case, grid) in enumerate(zip([case_name], [grid_name])):\n", "\n", - " # compute bias over 50 sites\n", - " diff = (df_yr_vol[f\"{grid}_vol\"] - df_yr_vol[\"obs_vol\"]).mean(axis=0, skipna=True)\n", + " # compute linear regression\n", + " df_reg = df_yr_vol[[\"obs_vol\", f\"{grid}_vol\"]].dropna()\n", + " regressor.fit(df_reg[[\"obs_vol\"]], df_reg[f\"{grid}_vol\"])\n", + " y_pred = regressor.predict(df_reg[[\"obs_vol\"]])\n", "\n", - " # color assigned to grid name\n", - " color = case_meta[grid][\"color\"]\n", + " # compute bias over 50 sites\n", + " diff = (df_yr_vol[f\"{grid}_vol\"] - df_yr_vol[\"obs_vol\"]).mean(\n", + " axis=0, skipna=True\n", + " )\n", "\n", - " df_yr_vol.plot(\n", - " ax=axes,\n", - " kind=\"scatter\",\n", - " x=\"obs_vol\",\n", - " y=f\"{grid}_vol\",\n", - " s=30,\n", - " color=color,\n", - " alpha=0.6,\n", - " label=grid_name,\n", - " )\n", - " # plt.plot(df_reg['obs_vol'], y_pred, color=color)\n", - " bias_text = bias_text + f\"\\n{grid}: {diff:.1f} \"\n", + " # color assigned to grid name\n", + " color = case_meta[grid][\"color\"]\n", "\n", - "plt.text(0.65, 0.30, \"bias [km3/yr]\", transform=axes.transAxes, verticalalignment=\"top\")\n", - "plt.text(0.65, 0.30, f\"{bias_text} \", transform=axes.transAxes, verticalalignment=\"top\")\n", + " df_yr_vol.plot(\n", + " ax=axes,\n", + " kind=\"scatter\",\n", + " x=\"obs_vol\",\n", + " y=f\"{grid}_vol\",\n", + " s=30,\n", + " color=color,\n", + " alpha=0.6,\n", + " label=grid,\n", + " )\n", + " # plt.plot(df_reg['obs_vol'], y_pred, color=color)\n", + " bias_text = bias_text + f\"\\n{grid}: {diff:.1f} \"\n", "\n", - "plt.axline((0, 0), slope=1, linestyle=\"--\", color=\"black\")\n", - "axes.set_xscale(\"log\")\n", - "axes.set_yscale(\"log\")\n", - "axes.set_xlabel(\"reference flow\")\n", - "axes.set_ylabel(\"Simulated flow\")\n", - "axes.set_title(\"River Flow at stations [km^3/yr]\")\n", + " plt.text(\n", + " 0.65, 0.30, \"bias [km3/yr]\", transform=axes.transAxes, verticalalignment=\"top\"\n", + " )\n", + " plt.text(\n", + " 0.65, 0.30, f\"{bias_text} \", transform=axes.transAxes, verticalalignment=\"top\"\n", + " )\n", "\n", - "if figureSave:\n", - " plt.savefig(\n", - " f\"./NB1_Fig5_50big_river_annual_flow_scatter_{analysis_name}.png\", dpi=200\n", - " )" + " plt.axline((0, 0), slope=1, linestyle=\"--\", color=\"black\")\n", + " axes.set_xscale(\"log\")\n", + " axes.set_yscale(\"log\")\n", + " axes.set_xlabel(\"reference flow\")\n", + " axes.set_ylabel(\"Simulated flow\")\n", + " axes.set_title(\"River Flow at stations [km^3/yr]\")\n", + "\n", + " if figureSave:\n", + " plt.savefig(\n", + " f\"./NB1_Fig5_50big_river_annual_flow_scatter_{analysis_name}.png\", dpi=200\n", + " )" ] }, { @@ -946,71 +993,71 @@ "outputs": [], "source": [ "%%time\n", + "if obs_available:\n", + " # Merge gauge_reach lnk (dataframe) into gauge shapefile\n", + " gauge_shp1 = {}\n", + " for case, df in gauge_reach_lnk.items():\n", + " # df = df.loc[(df['flag'] == 0)]\n", + " df1 = df.drop(columns=[\"riv_name\"])\n", + " gauge_shp1[case] = pd.merge(\n", + " gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\"\n", + " )\n", "\n", - "# Merge gauge_reach lnk (dataframe) into gauge shapefile\n", - "gauge_shp1 = {}\n", - "for case, df in gauge_reach_lnk.items():\n", - " # df = df.loc[(df['flag'] == 0)]\n", - " df1 = df.drop(columns=[\"riv_name\"])\n", - " gauge_shp1[case] = pd.merge(\n", - " gauge_shp, df1, how=\"inner\", left_on=\"id\", right_on=\"gauge_id\"\n", - " )\n", - "\n", - "# compute %bias, correlation, and RMSE at each site based on monthly\n", - "mon_pbias = {}\n", - "mon_corr = {}\n", - "mon_rmse = {}\n", + " # compute %bias, correlation, and RMSE at each site based on monthly\n", + " mon_pbias = {}\n", + " mon_corr = {}\n", + " mon_rmse = {}\n", "\n", - "for case, grid in zip([case_name], [grid_name]):\n", + " for case, grid in zip([case_name], [grid_name]):\n", "\n", - " bias = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", - " corr = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", - " rmse = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", + " bias = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", + " corr = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", + " rmse = np.full(len(gauge_shp1[case]), np.nan, dtype=float)\n", "\n", - " for ix, row in gauge_shp1[case].iterrows():\n", - " q_obs = np.full(\n", - " nmons, np.nan, dtype=float\n", - " ) # dummy q_sim that will be replaced by actual data if exist\n", - " q_sim = np.full(\n", - " nmons, np.nan, dtype=float\n", - " ) # dummy q_sim that will be replaced by actual data if exist\n", + " for ix, row in gauge_shp1[case].iterrows():\n", + " q_obs = np.full(\n", + " nmons, np.nan, dtype=float\n", + " ) # dummy q_sim that will be replaced by actual data if exist\n", + " q_sim = np.full(\n", + " nmons, np.nan, dtype=float\n", + " ) # dummy q_sim that will be replaced by actual data if exist\n", "\n", - " route_id = row[\"route_id\"]\n", - " gauge_id = row[\"gauge_id\"]\n", + " route_id = row[\"route_id\"]\n", + " gauge_id = row[\"gauge_id\"]\n", "\n", - " q_name = case_meta[grid][\"flow_name\"]\n", + " q_name = case_meta[grid][\"flow_name\"]\n", "\n", - " gauge_ix = np.argwhere(ds_q.id.values == gauge_id)\n", - " if len(gauge_ix) == 1:\n", - " gauge_ix = gauge_ix[0]\n", - " q_obs = ds_q_obs_mon[:, gauge_ix].squeeze().values\n", - "\n", - " route_ix = np.argwhere(reachID[case] == route_id)\n", - " if (\n", - " len(route_ix) == 1\n", - " ): # meaning there is flow site in network and simulation exist at this site\n", - " route_ix = route_ix[0]\n", - " if len(route_ix) == 1: # means vector\n", - " q_sim = month_data[case][q_name][:, route_ix].squeeze().values\n", - " elif len(route_ix) == 2: # means 2d grid\n", - " q_sim = (\n", - " month_data[case][q_name][:, route_ix[0], route_ix[1]]\n", - " .squeeze()\n", - " .values\n", - " )\n", + " gauge_ix = np.argwhere(ds_q.id.values == gauge_id)\n", + " if len(gauge_ix) == 1:\n", + " gauge_ix = gauge_ix[0]\n", + " q_obs = ds_q_obs_mon[:, gauge_ix].squeeze().values\n", + "\n", + " route_ix = np.argwhere(reachID[case] == route_id)\n", + " if (\n", + " len(route_ix) == 1\n", + " ): # meaning there is flow site in network and simulation exist at this site\n", + " route_ix = route_ix[0]\n", + " if len(route_ix) == 1: # means vector\n", + " q_sim = month_data[case][q_name][:, route_ix].squeeze().values\n", + " elif len(route_ix) == 2: # means 2d grid\n", + " q_sim = (\n", + " month_data[case][q_name][:, route_ix[0], route_ix[1]]\n", + " .squeeze()\n", + " .values\n", + " )\n", "\n", - " # compute %bias, correlation, RMSE\n", - " bias[ix] = np.nanmean(q_sim - q_obs) / np.nanmean(q_obs) * 100.0\n", - " corr[ix] = np.corrcoef(q_sim, q_obs)[0, 1]\n", - " rmse[ix] = np.sqrt(np.mean((q_sim - q_obs) ** 2))\n", + " # compute %bias, correlation, RMSE\n", + " bias[ix] = np.nanmean(q_sim - q_obs) / np.nanmean(q_obs) * 100.0\n", + " corr[ix] = np.corrcoef(q_sim, q_obs)[0, 1]\n", + " rmse[ix] = np.sqrt(np.mean((q_sim - q_obs) ** 2))\n", "\n", - " mon_pbias[case] = bias\n", - " mon_corr[case] = corr\n", - " mon_rmse[case] = rmse\n", + " mon_pbias[case] = bias\n", + " mon_corr[case] = corr\n", + " mon_rmse[case] = rmse\n", "\n", - " gauge_shp1[case][f\"bias_{grid}\"] = bias\n", - " gauge_shp1[case][f\"corr_{grid}\"] = corr\n", - " gauge_shp1[case][f\"rmse_{grid}\"] = rmse" + " gauge_shp1[case][f\"bias_{grid}\"] = bias\n", + " gauge_shp1[case][f\"corr_{grid}\"] = corr\n", + " gauge_shp1[case][f\"rmse_{grid}\"] = rmse" ] }, { @@ -1027,47 +1074,64 @@ "outputs": [], "source": [ "%%time\n", + "if obs_available:\n", + " # some local plot setups\n", + " cbar_kwrgs = {\n", + " \"bias\": {\n", + " \"shrink\": 0.9,\n", + " \"pad\": 0.02,\n", + " \"orientation\": \"horizontal\",\n", + " \"extend\": \"both\",\n", + " },\n", + " \"corr\": {\n", + " \"shrink\": 0.9,\n", + " \"pad\": 0.02,\n", + " \"orientation\": \"horizontal\",\n", + " \"extend\": \"min\",\n", + " },\n", + " \"rmse\": {\n", + " \"shrink\": 0.9,\n", + " \"pad\": 0.02,\n", + " \"orientation\": \"horizontal\",\n", + " \"extend\": \"max\",\n", + " },\n", + " }\n", + "\n", + " meta = {\n", + " \"bias\": {\"name\": \"%bias\", \"vmin\": -100, \"vmax\": 100, \"cm\": colors.cmap11},\n", + " \"corr\": {\"name\": \"correlation\", \"vmin\": 0.2, \"vmax\": 1, \"cm\": colors.cmap12},\n", + " \"rmse\": {\"name\": \"RMSE\", \"vmin\": 0, \"vmax\": 1000, \"cm\": mpl.cm.turbo},\n", + " }\n", + "\n", + " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", + " for case, grid in zip([case_name], [grid_name]):\n", + " fig, ax = plt.subplots(\n", + " 1, figsize=(7.5, 4.0), subplot_kw={\"projection\": ccrs.PlateCarree()}\n", + " )\n", "\n", - "# some local plot setups\n", - "cbar_kwrgs = {\n", - " \"bias\": {\"shrink\": 0.9, \"pad\": 0.02, \"orientation\": \"horizontal\", \"extend\": \"both\"},\n", - " \"corr\": {\"shrink\": 0.9, \"pad\": 0.02, \"orientation\": \"horizontal\", \"extend\": \"min\"},\n", - " \"rmse\": {\"shrink\": 0.9, \"pad\": 0.02, \"orientation\": \"horizontal\", \"extend\": \"max\"},\n", - "}\n", - "\n", - "meta = {\n", - " \"bias\": {\"name\": \"%bias\", \"vmin\": -100, \"vmax\": 100, \"cm\": colors.cmap11},\n", - " \"corr\": {\"name\": \"correlation\", \"vmin\": 0.2, \"vmax\": 1, \"cm\": colors.cmap12},\n", - " \"rmse\": {\"name\": \"RMSE\", \"vmin\": 0, \"vmax\": 1000, \"cm\": mpl.cm.turbo},\n", - "}\n", - "\n", - "for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", - " for case, grid in zip([case_name], [grid_name]):\n", - " fig, ax = plt.subplots(\n", - " 1, figsize=(7.5, 4.0), subplot_kw={\"projection\": ccrs.PlateCarree()}\n", - " )\n", - "\n", - " ax.add_feature(rivers_50m, facecolor=\"None\", edgecolor=\"b\", lw=0.5, alpha=0.3)\n", - " ax.add_feature(land, facecolor=\"white\", edgecolor=\"grey\")\n", - "\n", - " gauge_shp1[case].plot(\n", - " ax=ax,\n", - " column=f\"{error_metric}_{grid}\",\n", - " markersize=10,\n", - " cmap=meta[error_metric][\"cm\"],\n", - " vmin=meta[error_metric][\"vmin\"],\n", - " vmax=meta[error_metric][\"vmax\"],\n", - " )\n", + " ax.add_feature(\n", + " rivers_50m, facecolor=\"None\", edgecolor=\"b\", lw=0.5, alpha=0.3\n", + " )\n", + " ax.add_feature(land, facecolor=\"white\", edgecolor=\"grey\")\n", + "\n", + " gauge_shp1[case].plot(\n", + " ax=ax,\n", + " column=f\"{error_metric}_{grid}\",\n", + " markersize=10,\n", + " cmap=meta[error_metric][\"cm\"],\n", + " vmin=meta[error_metric][\"vmin\"],\n", + " vmax=meta[error_metric][\"vmax\"],\n", + " )\n", "\n", - " ax.set_title(\"%s %s\" % (case, meta[error_metric][\"name\"]))\n", + " ax.set_title(\"%s %s\" % (case, meta[error_metric][\"name\"]))\n", "\n", - " points = ax.collections[-1]\n", - " plt.colorbar(points, ax=ax, **cbar_kwrgs[error_metric])\n", + " points = ax.collections[-1]\n", + " plt.colorbar(points, ax=ax, **cbar_kwrgs[error_metric])\n", "\n", - " plt.tight_layout()\n", + " plt.tight_layout()\n", "\n", - " if figureSave:\n", - " plt.savefig(f\"./NB1_Fig6_{error_metric}_{case}_map.png\", dpi=200)" + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig6_{error_metric}_{case}_map.png\", dpi=200)" ] }, { @@ -1089,39 +1153,44 @@ "metadata": {}, "outputs": [], "source": [ - "boxprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"blue\"}\n", - "medianprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"red\"}\n", - "\n", - "for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", - " column_stat = []\n", - " gauge_shp_all_case = gauge_shp.copy(deep=True)\n", - " for case, grid in zip([case_name], [grid_name]):\n", - " gauge_shp_all_case = gauge_shp_all_case.merge(\n", - " gauge_shp1[case][[\"id\", f\"{error_metric}_{grid}\"]],\n", - " left_on=\"id\",\n", - " right_on=\"id\",\n", + "if obs_available:\n", + " boxprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"blue\"}\n", + " medianprops = {\"linestyle\": \"-\", \"linewidth\": 1.5, \"color\": \"red\"}\n", + "\n", + " for error_metric in [\"rmse\", \"bias\", \"corr\"]:\n", + " column_stat = []\n", + " gauge_shp_all_case = gauge_shp.copy(deep=True)\n", + " for case, grid in zip([case_name], [grid_name]):\n", + " gauge_shp_all_case = gauge_shp_all_case.merge(\n", + " gauge_shp1[case][[\"id\", f\"{error_metric}_{grid}\"]],\n", + " left_on=\"id\",\n", + " right_on=\"id\",\n", + " )\n", + " column_stat.append(f\"{error_metric}_{grid}\")\n", + " fig, ax = plt.subplots(1, figsize=(6.5, 4))\n", + " gauge_shp_all_case.boxplot(\n", + " ax=ax,\n", + " column=column_stat,\n", + " boxprops=boxprops,\n", + " medianprops=medianprops,\n", + " sym=\".\",\n", " )\n", - " column_stat.append(f\"{error_metric}_{grid}\")\n", - " fig, ax = plt.subplots(1, figsize=(6.5, 4))\n", - " gauge_shp_all_case.boxplot(\n", - " ax=ax, column=column_stat, boxprops=boxprops, medianprops=medianprops, sym=\".\"\n", - " )\n", "\n", - " xticklabels = [label[len(error_metric) + 1 :] for label in column_stat]\n", - " ax.set_xticklabels(xticklabels)\n", + " xticklabels = [label[len(error_metric) + 1 :] for label in column_stat]\n", + " ax.set_xticklabels(xticklabels)\n", "\n", - " if error_metric == \"rmse\":\n", - " ax.set_ylim([0, 1000])\n", - " ax.set_title(\"RMSE [m3/s]\")\n", - " elif error_metric == \"bias\":\n", - " ax.set_ylim([-150, 250])\n", - " ax.set_title(\"%bias [%]\")\n", - " elif error_metric == \"corr\":\n", - " ax.set_ylim([-0.2, 1])\n", - " ax.set_title(\"correlation\")\n", + " if error_metric == \"rmse\":\n", + " ax.set_ylim([0, 1000])\n", + " ax.set_title(\"RMSE [m3/s]\")\n", + " elif error_metric == \"bias\":\n", + " ax.set_ylim([-150, 250])\n", + " ax.set_title(\"%bias [%]\")\n", + " elif error_metric == \"corr\":\n", + " ax.set_ylim([-0.2, 1])\n", + " ax.set_title(\"correlation\")\n", "\n", - " if figureSave:\n", - " plt.savefig(f\"./NB1_Fig7_{error_metric}_boxplot.png\", dpi=150)" + " if figureSave:\n", + " plt.savefig(f\"./NB1_Fig7_{error_metric}_boxplot.png\", dpi=150)" ] }, {