From 92175849e2d20a6c5abe59b75346ef22acd1ba72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20M=C3=A9tivier?= <46794064+dmetivie@users.noreply.github.com> Date: Thu, 5 Sep 2024 10:48:29 +0200 Subject: [PATCH] adjust Geo figures --- docs/src/examples/tuto_paper.md | 272 ++++++++++------------- examples/tuto_paper.jl | 16 +- examples/utilities_geo_makie_features.jl | 10 +- 3 files changed, 131 insertions(+), 167 deletions(-) diff --git a/docs/src/examples/tuto_paper.md b/docs/src/examples/tuto_paper.md index abe29f1..d8dead4 100644 --- a/docs/src/examples/tuto_paper.md +++ b/docs/src/examples/tuto_paper.md @@ -9,13 +9,24 @@ cd(@__DIR__)#hide # Multisite daily Stochastic Weather Generator -This tutorial describes the Stochastic Weather Generator described in the *Interpretable Seasonal Hidden Markov Model for spatio-temporal stochastic rain generator in France* paper by Emmanuel Gobet, David Métivier and Sylvie Parey. -It provides a step-by-step construction of the Seasonal Hidden Markov Model (SHMM), the interpretation of the hidden states as Weather regimes over France and eventually the validation of the model with simulations. +This tutorial describes the numerical applications described in the paper [*Interpretable Seasonal Hidden Markov Model for spatio-temporal stochastic rain generation in France*](https://hal.inrae.fr/hal-04621349) by [Emmanuel Gobet](http://www.cmap.polytechnique.fr/~gobet/) (CMAP - École Polytechnique), [David Métivier](https://davidmetivier.mistea.inrae.fr/) (MISTEA -- INRAE) and [Sylvie Parey](https://fr.linkedin.com/in/sylvie-parey-60285194) (R&D -- EDF). +It shows a fully reproducible example on how to use the package `StochasticWeatherGenerators.jl` to reproduce, step-by-step, exactly (almost) all the figures of the paper. + +The paper describes the construction of a Stochastic Weather Generator with an Autoregressive Seasonal Hidden Markov Model (SHMM). The SHMM is trained with French weather stations, and the hidden states are interpreted as weather regimes. The model is validated with simulations, especially for its ability to reproduce extreme weather, e.g. droughts. +[Schematic of the Autoregressive Seasonal Hidden Markov Model](https://github.com/dmetivie/StochasticWeatherGenerators.jl/assets/46794064/5fe1d677-877d-4fd5-83ac-29d30f728ca5) +In the paper, the model is also used with Climate Change RCP scenarios (not shown here). ## Set up ### Package and functions +!!! note "For Julia new user" + There are several ways to `add` a package before `using`, one way is for this tutorial to copy-paste (it might take a while): + ```julia + import Pkg + Pkg.add(["CSV", "JLD", "DelimitedFiles", "DataFrames", "DataFramesMeta", "StatsBase", "Random", "Distributions", "StatsPlots", "LaTeXStrings"]) + ``` + ````@example tuto_paper using CSV, JLD, DelimitedFiles # File Read/Load/Save @@ -29,12 +40,21 @@ using Distributions ```` The two main packages for this tutorial are not yet registered in the official Julia registry, since they are not quite fully ready. -They can be either `add`ed through [my local Julia registry](https://github.com/dmetivie/LocalRegistry) with the [LocalRegistry.jl](https://github.com/GunnarFarneback/LocalRegistry.jl) package (or url). +They can be either `add`ed through [my local Julia registry](https://github.com/dmetivie/LocalRegistry) with the [LocalRegistry.jl](https://github.com/GunnarFarneback/LocalRegistry.jl) package i.e. +```julia +using LocalRegistry +using Pkg +pkg"registry add https://github.com/dmetivie/LocalRegistry" +Pkg.add("SmoothPeriodicStatsModels") +Pkg.add("StochasticWeatherGenerators") +``` + Or directly on the master branch with `add`ed via url i.e. + ```julia import Pkg -Pkg.add("https://github.com/dmetivie/SmoothPeriodicStatsModels.jl") -Pkg.add("https://github.com/dmetivie/StochasticWeatherGenerators.jl") +Pkg.add(url = "https://github.com/dmetivie/SmoothPeriodicStatsModels.jl") +Pkg.add(url = "https://github.com/dmetivie/StochasticWeatherGenerators.jl") ``` ````@example tuto_paper @@ -44,8 +64,6 @@ using StochasticWeatherGenerators # interface to use with SmoothPeriodicStatsMod ```` ````@example tuto_paper -save_tuto_path = "../../assets/tuto_1" - Random.seed!(1234) ```` @@ -66,13 +84,13 @@ file_for_plot_utilities = download("https://raw.githubusercontent.com/dmetivie/S include(file_for_plot_utilities) ```` -To plot maps, we use `GeoMakie.jl` + a hack with `NaturalEarth.jl`. This is still experimental. +To plot maps, we use `GeoMakie.jl` + `NaturalEarth.jl`. Note that using `cartopy` with `PyCall.jl` also works very well. For the following code to work you will need to add the following packages ```julia import Pkg -Pkg.add("HTTP", "JSON3", "GeoMakie", "CairoMakie") +Pkg.add("NaturalEarth", "GeoMakie", "CairoMakie") ``` ````@example tuto_paper @@ -91,7 +109,7 @@ T = 366 Define the French area for map (Longitude and latitude) plot and the precision of the map `precision_scale` ````@example tuto_paper -precision_scale = "50m" +precision_scale = 50 # meter LON_min = -5 # West @@ -167,7 +185,7 @@ selected_station_name = ["BOURGES", "TOULOUSE", "MARIGNANE", "LUXEMBOURG", "LILL !!! note "Hypothesis: Conditional Independence of Rain Occurrences" You can change the selected stations. However, keep in mind that for the model to work, the **conditional independence hypothesis** must hold between stations i.e. $\mathbb{P}(Y_1 = y_1, \cdots, Y_S = y_s\mid Z = k) = \prod_{s=1}^S \mathbb{P}(Y_s = y_s)$. - Hence stations must be sufficiently far apart. + Hence stations must be sufficiently far apart. Check out this [MNIST example](https://dmetivie.github.io/ExpectationMaximization.jl/dev/examples/#MNIST-dataset:-Bernoulli-Mixture) to see Bernoulli mixtures in action! ````@example tuto_paper station = @subset(station_all, :STANAME .∈ tuple(selected_station_name)) @@ -249,7 +267,7 @@ LON_idx = dms_to_dd.(station.LON) long_spell = [longuest_spell(y) for y in eachcol(Y)] -# map_with_stations(LON_idx, LAT_idx, long_spell; station_name=station_name, show_value=true, colorbar_show=true) +FR_map_spell = map_with_stations(LON_idx, LAT_idx, long_spell; station_name=station_name, show_value=true, colorbar_show=true, precision_scale = precision_scale) ```` ## Fit the seasonal HMM @@ -262,7 +280,7 @@ long_spell = [longuest_spell(y) for y in eachcol(Y)] ``` with ```math - P_c(t) = c_0 + \sum_{j=1}^\cDeg \left(c_{2j-1}\cos\left(\dfrac{2\pi}{T}j t\right) + c_{2j}\sin\left(\dfrac{2\pi}{T}j t\right). + P_c(t) = c_0 + \sum_{j=1}^{\texttt{Deg}} \left(c_{2j-1}\cos\left(\dfrac{2\pi}{T}j t\right) + c_{2j}\sin\left(\dfrac{2\pi}{T}j t\right)\right). ``` ### Fit slice: naive estimation @@ -298,11 +316,6 @@ With the Slice estimate as a good starting point for the full (seasonal) Baum We nothing #hide ```` -````@example tuto_paper -save(joinpath(save_tuto_path,"hmm_fit_K_$(K)_d_$(𝐃𝐞𝐠)_m_$(local_order).jld"), "hmm", hmm_fit, "hist", hist, "Q_param", θq_fit, "Y_param", θy_fit); -nothing #hide -```` - Uncomment to load previously computed hmm ```julia # hmm_infos = load("save_tuto_path/hmm_fit.jld") @@ -318,48 +331,38 @@ Uncomment to load previously computed hmm ````@example tuto_paper begin - pA = [plot(legendfont=14, foreground_color_legend=nothing, background_color_legend=nothing, yticksfontsize = 12) for k in 1:K] + pA = [plot(legendfont=14, foreground_color_legend=nothing, background_color_legend=nothing, legend_columns=4, tickfont=12, legendfontsize=16) for k in 1:K] for k in 1:K - [plot!(pA[k], hmm_fit.A[k, l, :], c=my_color(l, K), label=L"Q_{%$(k)\to %$(l)}", legend=:topleft) for l in 1:K] + [plot!(pA[k], hmm_fit.A[k, l, :], c=my_color(l, K), label=L"Q_{%$(k)\to %$(l)}", legend=:top, lw=1.75) for l in 1:K] hline!(pA[k], [0.5], c=:black, label=:none, s=:dot) - xticks!(pA[k], vcat(dayofyear_Leap.(Date.(2000, 1:12)), 366), vcat(string.(monthabbr.(1:12)), ""), xlims=(0, 367), xtickfontsize=10, ylims=(0, 1)) + xticks!(pA[k], vcat(dayofyear_Leap.(Date.(2000, 1:12)), 366), vcat(string.(monthabbr.(1:12)), ""), xlims=(0, 367), ylims=(0, 1)) end pallA = plot(pA..., size=(1000, 500)) end ```` -````@example tuto_paper -savefig(pallA, joinpath(save_tuto_path,"Q_transition_memo_1_K_4_d_2.pdf")); -nothing #hide -```` - #### Rain probabilities ````@example tuto_paper begin mm = 1 jt = D - pB = [plot(legendfont=14, title="$(station_name[j])", titlefontsize=16) for j in 1:jt] + pB = [plot(legendfont=14, title="$(station_name[j])", titlefontsize=17, tickfont=14, legendfontsize = 16) for j in 1:jt] for j in 1:jt - [plot!(pB[j], succprob.(hmm_fit.B[k, :, j, mm]), c=my_color(k, K), label=:none) for k in 1:K] + [plot!(pB[j], succprob.(hmm_fit.B[k, :, j, mm]), c=my_color(k, K), label=islabel(j, 3, L"\mathbb{P}(Y = \textrm{wet}\mid Z = %$k, H = \textrm{dry})"), lw=2) for k in 1:K] hline!(pB[j], [0.5], c=:black, label=:none, s=:dot) xticks!( pB[j], vcat(dayofyear_Leap.(Date.(2000, 1:12)), 366), - vcat(string.(monthabbr.(1:12)), ""), xtickfontsize=10 + vcat(string.(first.(monthabbr.(1:12)))) ) xlims!(pB[j], (0, 367)) ylims!(pB[j], (0, 1)) end - pallB = plot(pB[staid_lat]..., size=(3000 / 1.25, 1000 / 1.25), layout=(2, 5)) + pallB = plot(pB[staid_lat]..., size=(3000 / 2, 1000 / 1), layout=(2, 5)) end ```` -````@example tuto_paper -savefig(pallB, joinpath(save_tuto_path,"proba_rain_all_station.pdf")); -nothing #hide -```` - #### Spatial Rain probability ````@example tuto_paper @@ -369,6 +372,12 @@ memory_past_cat = 1 h = 1 (day before dry) or 2 (day before wet) $\mathbb{P}(Y = \text{Rain}\mid Z = k, H = h)$ with `h = memory_past_cat` +For now there are some scale rendering issues due to an [GeoMakie.jl issue](https://github.com/MakieOrg/GeoMakie.jl/issues/268) so it might be tiny. + +````@example tuto_paper +p_FR_map_mean_prob = map_with_stations(LON_idx, LAT_idx, [[mean(succprob.(hmm_fit.B[k, :, j, memory_past_cat])) for j in 1:length(STAID)] for k in 1:K], colorbar_show=true, colorbar_title = L"\mathbb{P}(Y = \text{Rain}\mid Z = k, H = 1)", precision_scale = precision_scale) +```` + ### Inference of the historical hidden states #### Viterbi algorithm @@ -381,8 +390,6 @@ data_stations_z = map(data_stations) do df end ẑ_per_cat = [findall(ẑ .== k) for k in 1:K] - -# CSV.write(joinpath(save_tuto_path,"z_hat_K_$(K)_d_$(𝐃𝐞𝐠)_m_$(local_order).csv"), DataFrame([:DATE, :z] .=> [data_stations[1].DATE[1+local_order:end], ẑ])); ```` #### Visualization of the historical sequences of hidden states @@ -410,15 +417,10 @@ begin xticks!(vcat(dayofyear_Leap.(Date.(2000, 1:12)), 366), vcat(string.(monthabbr.(1:12)), ""), xlims=(0, 367), xtickfontsize=14 / thick, ytickfontsize=14 / thick) hline!((1:year_nb) .+ 0.5, c=:black, legend=:none, lw=4) ylims!(0.5, year_nb + 0.5) - pviterbi = yticks!(1:year_nb, string.(year_range[select_year])) + pviterbi = yticks!(1:year_nb, string.(year_range[select_year]), size=(1000, 600)) end ```` -````@example tuto_paper -savefig(pviterbi, joinpath(save_tuto_path,"temporal_1959_2009.pdf")); -nothing #hide -```` - ## Adding Rain amounts to the model ### Marginal distribution @@ -431,9 +433,7 @@ g(r) = w \dfrac{e^{-{\frac {r}{\vartheta_1}}}}{\vartheta_1} + (1-w) \dfrac{e^{- whose parameters $w(t)$, $\vartheta_1(t)$ and $\vartheta_2(t)$ are smooth periodic functions of the day of the year. ````@example tuto_paper -@time "FitMLE RR" mix_allE = fit_mle_RR.(data_stations_z, K, local_order, mix₀=StochasticWeatherGenerators.mix_ini(T)) - -save(joinpath(save_tuto_path, "rain_mix.jld"), "mix2Exp", mix_allE); +@time "FitMLE RR" mix_allE = fit_mle_RR.(data_stations_z, K, local_order, mix₀=StochasticWeatherGenerators.mix_ini(T)); nothing #hide ```` @@ -533,62 +533,52 @@ nothing #hide make_range(y, step=1) = range(extrema(y)..., step=step) begin - dw_dry = 1 # dry - p_spell_dry = [plot(ylims=(1e-4, 1e-0), ytickfontsize=9, xtickfontsize=10) for j = 1:D] + dry_or_wet = 1 # dry + p_spell_dry = [plot(ylims=(1e-4, 1e-0), tickfont=11, legendfontsize=13) for j = 1:D] for j = 1:D - all_spells = len_spell_simu[:, j, dw_dry] - errorlinehist!(p_spell_dry[j], all_spells, groupcolor=:grey, legend = :topright, label=islabel(j, staid_lat[[1]], L"Simu $q_{0,100}$"), norm=:probability, bins=make_range(reduce(vcat,all_spells)), errortype = :percentile, percentiles = [0,100], fillalpha = 0.4, centertype = :median) + all_spells = len_spell_simu[:, j, dry_or_wet] + errorlinehist!(p_spell_dry[j], all_spells, groupcolor=:grey, legend=:topright, label=islabel(j, staid_lat[[1]], L"Simu $q_{0,100}$"), norm=:probability, bins=make_range(reduce(vcat, all_spells)), errortype=:percentile, percentiles=[0, 100], fillalpha=0.4, centertype=:median) - errorlinehist!(p_spell_dry[j], all_spells, groupcolor=:red, label=islabel(j, staid_lat[[1]], L"Simu $q_{25,75}$"), norm=:probability, bins=make_range(reduce(vcat,all_spells)), errortype = :percentile, percentiles = [25,75], fillalpha = 0.5, centertype = :median) + errorlinehist!(p_spell_dry[j], all_spells, groupcolor=:red, label=islabel(j, staid_lat[[1]], L"Simu $q_{25,75}$"), norm=:probability, bins=make_range(reduce(vcat, all_spells)), errortype=:percentile, percentiles=[25, 75], fillalpha=0.5, centertype=:median) - histo_spell = len_spell_hist[j, dw_dry] - errorlinehist!(p_spell_dry[j], [histo_spell], label=islabel(j, staid_lat[[1]], "Obs"), groupcolor=:blue, lw=1.5, norm=:probability, bins=make_range(histo_spell), errortype = :percentile) + histo_spell = len_spell_hist[j, dry_or_wet] + errorlinehist!(p_spell_dry[j], [histo_spell], label=islabel(j, staid_lat[[1]], "Obs"), groupcolor=:blue, lw=1.5, norm=:probability, bins=make_range(histo_spell), errortype=:percentile) xlims!(p_spell_dry[j], 0, 2 + maximum(1.5maximum.(histo_spell))) yaxis!(:log10) end - [xlabel!(p_spell_dry[j], "Nb of days", xlabelfontsize=10) for j in staid_lat[6:10]] - [ylabel!(p_spell_dry[j], "PMF", ylabelfontsize=10) for j in staid_lat[[1, 6]]] + [xlabel!(p_spell_dry[j], "Nb of days", xlabelfontsize=12) for j in staid_lat[6:10]] + [ylabel!(p_spell_dry[j], "PMF", ylabelfontsize=12) for j in staid_lat[[1, 6]]] [title!(p_spell_dry[j], station_name[j], titlefontsize=13) for j = 1:D] - pall_spell_dry = plot(p_spell_dry[staid_lat]..., size=(3000 / 2.5, 1000 / 1.5), layout=(2, 5)) + pall_spell_dry = plot(p_spell_dry[staid_lat]..., size=(3000 / 2.5, 1000 / 1.5), layout=(2, 5), left_margin=0.5cm, bottom_margin=0.275cm) end ```` -````@example tuto_paper -savefig(joinpath(save_tuto_path,"spell_steppost_dry_c1.pdf")); -nothing #hide -```` - #### Wet spell ````@example tuto_paper begin - dw_dry = 2 # wet - p_spell_dry = [plot(ylims=(1e-4, 1e-0), ytickfontsize=9, xtickfontsize=10, legendfontsize = 11) for j = 1:D] + dry_or_wet = 2 # wet + p_spell_wet = [plot(ylims=(1e-4, 1e-0), tickfont=11, legendfontsize=13) for j = 1:D] for j = 1:D - all_spells = len_spell_simu[:, j, dw_dry] - errorlinehist!(p_spell_dry[j], all_spells, groupcolor=:grey, legend = :topright, label=islabel(j, staid_lat[[1]], L"Simu $q_{0,100}$"), norm=:probability, bins=make_range(reduce(vcat,all_spells)), errortype = :percentile, percentiles = [0,100], fillalpha = 0.4, centertype = :median) + all_spells = len_spell_simu[:, j, dry_or_wet] + errorlinehist!(p_spell_wet[j], all_spells, groupcolor=:grey, legend=:topright, label=islabel(j, staid_lat[[1]], L"Simu $q_{0,100}$"), norm=:probability, bins=make_range(reduce(vcat, all_spells)), errortype=:percentile, percentiles=[0, 100], fillalpha=0.4, centertype=:median) - errorlinehist!(p_spell_dry[j], all_spells, groupcolor=:red, label=islabel(j, staid_lat[[1]], L"Simu $q_{25,75}$"), norm=:probability, bins=make_range(reduce(vcat,all_spells)), errortype = :percentile, percentiles = [25,75], fillalpha = 0.5, centertype = :median) + errorlinehist!(p_spell_wet[j], all_spells, groupcolor=:red, label=islabel(j, staid_lat[[1]], L"Simu $q_{25,75}$"), norm=:probability, bins=make_range(reduce(vcat, all_spells)), errortype=:percentile, percentiles=[25, 75], fillalpha=0.5, centertype=:median) - histo_spell = len_spell_hist[j, dw_dry] - errorlinehist!(p_spell_dry[j], [histo_spell], label=islabel(j, staid_lat[[1]], "Obs"), groupcolor=:blue, lw=1.5, norm=:probability, bins=make_range(histo_spell), errortype = :percentile) - xlims!(p_spell_dry[j], 0, 2 + maximum(1.5maximum.(histo_spell))) + histo_spell = len_spell_hist[j, dry_or_wet] + errorlinehist!(p_spell_wet[j], [histo_spell], label=islabel(j, staid_lat[[1]], "Obs"), groupcolor=:blue, lw=1.5, norm=:probability, bins=make_range(histo_spell), errortype=:percentile) + xlims!(p_spell_wet[j], 0, 2 + maximum(1.5maximum.(histo_spell))) yaxis!(:log10) end - [xlabel!(p_spell_dry[j], "Nb of days", xlabelfontsize=10) for j in staid_lat[6:10]] - [ylabel!(p_spell_dry[j], "PMF", ylabelfontsize=10) for j in staid_lat[[1, 6]]] - [title!(p_spell_dry[j], station_name[j], titlefontsize=13) for j = 1:D] - pall_spell_dry = plot(p_spell_dry[staid_lat]..., size=(3000 / 2.5, 1000 / 1.5), layout=(2, 5)) + [xlabel!(p_spell_wet[j], "Nb of days", xlabelfontsize=12) for j in staid_lat[6:10]] + [ylabel!(p_spell_wet[j], "PMF", ylabelfontsize=12) for j in staid_lat[[1, 6]]] + [title!(p_spell_wet[j], station_name[j], titlefontsize=13) for j = 1:D] + pall_spell_wet = plot(p_spell_wet[staid_lat]..., size=(3000 / 2.5, 1000 / 1.5), layout=(2, 5), left_margin=0.5cm, bottom_margin=0.275cm) end ```` -````@example tuto_paper -savefig(joinpath(save_tuto_path,"spell_steppost_wet_c1.pdf")); -nothing #hide -```` - ### Rain #### Interpretation: Mean Rain per weather regime $R > 0 \mid Z = k$. @@ -597,7 +587,7 @@ We plot the empirical (strictly) positive **mean** rain amounts per weather regi ````@example tuto_paper begin - p_rainpercat = [plot(ytickfontsize=12, xtickfontsize=12) for j = 1:D] + p_rainpercat = [plot(tickfont=12, ylabelfontsize=14, titlefontsize=14, legendfontsize=13) for j = 1:D] for j = 1:D df_j = @chain data_stations_z[j] begin dropmissing @@ -609,58 +599,48 @@ begin # Uncomment to see how the double exponential mixtures compare to the empirical data. # [plot!(p_rainpercat[j], 1:T, t -> conversion_factor * mean(mix_allE[j][k, t]), label=:none, c=my_color(k, K), lw=1.5, legend = :topleft) for k in 1:K] for k in 1:K - cycle_avg = replace(cyclic_moving_average(df_j[k].MEAN_RR, df_j[k].day, T, 15), 0=>missing) - @df df_j[k] plot!(p_rainpercat[j], 1:T, conversion_factor * cycle_avg, c=my_color(k, K), alpha = 1, label=islabel(j, staid_lat[[4]], L"Z = %$k"), lw=1.5) + cycle_avg = replace(cyclic_moving_average(df_j[k].MEAN_RR, df_j[k].day, T, 15), 0 => missing) + @df df_j[k] plot!(p_rainpercat[j], 1:T, conversion_factor * cycle_avg, c=my_color(k, K), alpha=1, label=islabel(j, staid_lat[[4]], L"Z = %$k"), lw=1.5) end ylims!(p_rainpercat[j], 0, Inf) end - [ylabel!(p_rainpercat[j], L"Rain ($\mathrm{mm/m}^2$)", ylabelfontsize=14) for j in staid_lat[[1, 6]]] + [ylabel!(p_rainpercat[j], L"Rain (mm/m$^2$)") for j in staid_lat[[1, 6]]] [xticks!( p_rainpercat[j], vcat(dayofyear_Leap.(Date.(2000, 1:12)), 366), - vcat(string.(first.(string.(monthabbr.(1:12))))), xtickfontsize=10 + vcat(string.(first.(string.(monthabbr.(1:12))))) ) for j in 1:D] - [title!(p_rainpercat[j], station_name[j], titlefontsize=14) for j = 1:D] - plt_rain_cat_mix = plot(p_rainpercat[staid_lat]..., size=(3000 / 2.05, 1000 / 1.85), layout=(2, 5), left_margin = 20px) + [title!(p_rainpercat[j], station_name[j]) for j = 1:D] + plt_rain_cat_mix = plot(p_rainpercat[staid_lat]..., size=(3000 / 2.2, 1000 / 1.5), layout=(2, 5), left_margin=25px) end ```` -````@example tuto_paper -savefig(plt_rain_cat_mix, joinpath(save_tuto_path,"mean_positive_rain_per_cat_from_mixture.pdf")); -nothing #hide -```` - #### Univariate Rain distributions Historical vs Nb simulations distribution ````@example tuto_paper begin - p_uniR = [plot(yaxis=:log10, ylims=(1e-4, 1e-0), ytickfontsize=9, xtickfontsize=10, legendfontsize = 11) for j = 1:D] + p_uniR = [plot(yaxis=:log10, ylims=(1e-4, 1e-0), tickfont=11, legendfontsize=13, titlefontsize=13) for j = 1:D] for j = 1:D dists_RR_positive_j = conversion_factor * [filter(!iszero, rs[j, :, i]) for i in 1:Nb] - errorlinehist!(p_uniR[j], dists_RR_positive_j, groupcolor=:grey, legend = :topright, label=islabel(j, staid_lat[[1]], L"Simu $q_{0,100}$"), norm=:pdf, errortype = :percentile, percentiles = [0,100], fillalpha = 0.4, centertype = :median) + errorlinehist!(p_uniR[j], dists_RR_positive_j, groupcolor=:grey, legend=:topright, label=islabel(j, staid_lat[[1]], L"Simu $q_{0,100}$"), norm=:pdf, errortype=:percentile, percentiles=[0, 100], fillalpha=0.4, centertype=:median) - errorlinehist!(p_uniR[j], dists_RR_positive_j, groupcolor=:red, label=islabel(j, staid_lat[[1]], L"Simu $q_{25,75}$"), norm=:pdf, errortype = :percentile, percentiles = [25,75], fillalpha = 0.5, centertype = :median) + errorlinehist!(p_uniR[j], dists_RR_positive_j, groupcolor=:red, label=islabel(j, staid_lat[[1]], L"Simu $q_{25,75}$"), norm=:pdf, errortype=:percentile, percentiles=[25, 75], fillalpha=0.5, centertype=:median) - errorlinehist!(p_uniR[j], [conversion_factor * filter(!iszero, data_stations[j].RR)], label=islabel(j, staid_lat[[1]], "Obs"), groupcolor=:blue, lw=1.5, norm=:pdf, errortype = :percentile) + errorlinehist!(p_uniR[j], [conversion_factor * filter(!iszero, data_stations[j].RR)], label=islabel(j, staid_lat[[1]], "Obs"), groupcolor=:blue, lw=1.5, norm=:pdf, errortype=:percentile) xlims!(p_uniR[j], 0.0, Inf) end - [plot!(p_uniR[j], xlabel=L"Rain (mm/m$^2$)", xlabelfontsize=10) for j in staid_lat[6:10]] - [plot!(p_uniR[j], ylabel="PDF", ylabelfontsize=10) for j in staid_lat[[1, 6]]] + [plot!(p_uniR[j], xlabel=L"Rain (mm/m$^2$)") for j in staid_lat[6:10]] + [plot!(p_uniR[j], ylabel="PDF") for j in staid_lat[[1, 6]]] - [title!(p_uniR[j], station_name[j], titlefontsize=13) for j = 1:D] + [title!(p_uniR[j], station_name[j]) for j = 1:D] - pall_R = plot(p_uniR[staid_lat]..., size=(3000 / 2.5, 1000 / 1.5), layout=(2, 5), bottom_margin = 3px) + pall_R = plot(p_uniR[staid_lat]..., size=(3000 / 2.5, 1000 / 1.5), layout=(2, 5), bottom_margin=11px, left_margin=15px) end ```` -````@example tuto_paper -savefig(pall_R, joinpath(save_tuto_path, "dist_R_positive.pdf")); -nothing #hide -```` - #### Monthly quantile amount ````@example tuto_paper @@ -673,31 +653,27 @@ month_rain_histo = [cum_monthly(rh[:, j], idx_all) for j in 1:D] qs = [0.9, 0.5, 0.1] @time "Plot monthly quantile" begin - p_month_RR = [scatter(ytickfontsize=11) for j = 1:D] + p_month_RR = [scatter(xtickfontsize=10, ytickfontsize=11, ylabelfontsize=12, legendfontsize = 12, foreground_color_legend=nothing) for j = 1:D] for j = 1:D for (α, per) in enumerate([[0, 100], [25, 75]]) for (cc, q) in enumerate(qs) - errorline!(p_month_RR[j], [quantile(month_rain_simu[j, i][:, m], q) * conversion_factor for m in 1:12, i in 1:Nb], label=:none, fillalpha=0.18 * α^2, centertype=:median, errortype=:percentile, percentiles=per, groupcolor=my_palette(length(qs))[cc]) + errorline!(p_month_RR[j], [quantile(month_rain_simu[j, i][:, m], q) * conversion_factor for m in 1:12, i in 1:Nb], label=(α == 1 ? islabel(j, 9,L"Simu $q_{%$(Int(q*100))}$") : :none), fillalpha=0.18 * α^2, centertype=:median, errortype=:percentile, percentiles=per, groupcolor=my_palette(length(qs))[cc]) end end for q in qs - scatter!(p_month_RR[j], m -> quantile(month_rain_histo[j][:, m], q) * conversion_factor, 1:12, label=:none, ms=2, c=:blue) - plot!(p_month_RR[j], m -> quantile(month_rain_histo[j][:, m], q) * conversion_factor, 1:12, label=:none, c=:blue) + scatter!(p_month_RR[j], m -> quantile(month_rain_histo[j][:, m], q) * conversion_factor, 1:12, label=(q == qs[1] ? islabel(j, 3,"Obs") : :none), legend = :topleft, ms=2.5, c=:blue) + plot!(p_month_RR[j], m -> quantile(month_rain_histo[j][:, m], q) * conversion_factor, 1:12, label=:none, c=:blue, lw=1.75) end - xticks!(p_month_RR[j], 1:12, string.(first.(monthabbr.(1:12))), xtickfontsize=8) + xticks!(p_month_RR[j], 1:12, string.(first.(monthabbr.(1:12)))) + ylims!(p_month_RR[j], 0, Inf) end - [ylabel!(p_month_RR[j], "Rain (mm/month)", ylabelfontsize=12) for j in staid_lat[[1, 6]]] + [ylabel!(p_month_RR[j], L"Rain (mm/m$^2$)") for j in staid_lat[[1, 6]]] [title!(p_month_RR[j], station_name[j], titlefontsize=12) for j = 1:D] - pall_month_RR = plot(p_month_RR[staid_lat]..., size=(1190, 500), layout=(2, 5)) + pall_month_RR = plot(p_month_RR[staid_lat]..., size=(1190, 500), layout=(2, 5), left_margin=19px) end ```` -````@example tuto_paper -savefigcrop(pall_month_RR, joinpath(save_tuto_path, "EDF_like_$(Nb)_simu_monthly_quantile_K_$(K)_d_$(𝐃𝐞𝐠)_m_$(local_order)")); -nothing #hide -```` - ### Correlations ##### Rain event dry/wet @@ -709,7 +685,7 @@ cor_bin_mean_simu = mean(cor(ys[:, :, i]) for i in 1:Nb); begin - plots_cor_bin = [plot(-0.1:0.1:0.8, -0.1:0.1:0.8, aspect_ratio=true, label=:none, xlabelfontsize=16, ylabelfontsize=16, xtickfontsize=10, ytickfontsize=10) for _ in 1:1] + plots_cor_bin = [plot(-0.1:0.1:0.8, -0.1:0.1:0.8, aspect_ratio=true, label=:none, xlabelfontsize=16, ylabelfontsize=16, tickfont=11, legendfontsize=13) for _ in 1:1] scatter!(plots_cor_bin[1], vec_triu(cor_bin_hist), vec_triu(cor_bin_mean_simu), label="Correlations", xlabel="Observations", ylabel="Simulations", c=2) [xlims!(plots_cor_bin[i], -0.1, 1) for i in 1:1] [ylims!(plots_cor_bin[i], -0.1, 1) for i in 1:1] @@ -718,11 +694,6 @@ begin end ```` -````@example tuto_paper -savefigcrop(plot_cor_bin, joinpath(save_tuto_path, "full_cor_binary_hist_vs_1000_mean_simu")); -nothing #hide -```` - The largest pair correlation error for rain occurence comes from the pair ````@example tuto_paper @@ -741,7 +712,7 @@ cor_mean_simu = mean(cor(rs[:, :, i]') for i in 1:Nb); corT_mean_simu = mean(corTail(rs[:, :, i]') for i in 1:Nb); begin - plots_cor = [plot(-0.1:0.1:0.8, -0.1:0.1:0.8, aspect_ratio=true, label=:none, xlabelfontsize=16, ylabelfontsize=16, xtickfontsize=10, ytickfontsize=10) for _ in 1:2] + plots_cor = [plot(-0.1:0.1:0.8, -0.1:0.1:0.8, aspect_ratio=true, label=:none, xlabelfontsize=16, ylabelfontsize=16, tickfont=11, legendfontsize=13) for _ in 1:2] scatter!(plots_cor[1], vec_triu(cor_hist), vec_triu(cor_mean_simu), label="Correlations", xlabel="Observations", ylabel="Simulations", c=2) annotate!(plots_cor[1], 0.3, 0.7, "MSE ≃ $(round(mean(abs2, vec_triu(cor_hist) - vec_triu(cor_mean_simu)), digits = 4))") @@ -750,16 +721,10 @@ begin [xlims!(plots_cor[i], -0.1, 1) for i in 1:2] [ylims!(plots_cor[i], -0.1, 1) for i in 1:2] - plot(plots_cor...) + plot(plots_cor..., size=(800, 400), left_margin=15px) end ```` -````@example tuto_paper -savefigcrop(plots_cor[1], joinpath(save_tuto_path, "full_cor_hist_vs_1000_mean_simu")); -savefigcrop(plots_cor[2], joinpath(save_tuto_path, "full_corT_hist_vs_1000_mean_simu")); -nothing #hide -```` - The largest pair correlation error for rain (zero and non zero amounts) comes from the pair ````@example tuto_paper @@ -768,41 +733,38 @@ println("$(station_name[findmax(cor_hist - cor_mean_simu)[2][1]]) and $(station_ ##### Gaussian copula hypothesis -For a pair of stations, we transform the marginal $R_s>0$ to $\mathcal{N}(0,1)$. We compare the obtained bi-variate Normal distribution to the theoretical $\chi(2)$-distriubtion. +For a pair of stations, we transform the marginal $R_s>0$ to $\mathcal{N}(0,1)$. We compare the obtained bi-variate Normal distribution with the Mahalanobis distance to the theoretical $\chi^2(2)$-distriubtion. ````@example tuto_paper +corΣ = cov2cor.(Σ²RR) begin - j1 = 1 - j2 = 5 - plt_qqp_copula = plot(0:5, 0:5, aspect_ratio = :equal, legendfontsize = 14, c=:black) + j1 = 10 + j2 = 8 + plt_qqp_copula = plot(0:25, 0:25, aspect_ratio=:equal, legendfontsize=14, c=:black, label=:none, tickfont=12, ylabelfontsize=13, xlabelfontsize=13) + df_12 = leftjoin(data_stations_z[j1], data_stations_z[j2], on=:DATE, makeunique=true) + @subset!(df_12, :RR .> 0, :RR_1 .> 0) for k in 1:K - df_12 = leftjoin(data_stations_z[j1], data_stations_z[j2], on = :DATE, makeunique = true) - df_X = @chain df_12 begin - @subset(:RR .> 0, :RR_1 .> 0, :z .== k) - dropmissing - @aside cdf_ = ecdf(_.RR) - @aside cdf_1 = ecdf(_.RR_1) - @transform(:X = quantile(Normal(), cdf_(:RR)*length(:RR)/(1+length(:RR))), - :X_1 = quantile(Normal(), cdf_1(:RR_1)*length(:RR_1)/(1+length(:RR_1)))) - end - X = hcat(df_X.X, df_X.X_1) - Σ⁻¹ = inv(cov(X)) - X2 = [sqrt(x' * Σ⁻¹ * x) for x in eachrow(X)] |> sort - ecdfX2 = ecdf(X2)(X2)*length(X2)/(length(X2)+1) + df_X = @chain df_12 begin + @subset(:z .== k) + dropmissing + @aside u = StochasticWeatherGenerators.Copulas.pseudos(permutedims(hcat(_.RR, _.RR_1))) + @transform(:X = quantile(Normal(), u[1,:]), :X_1 = quantile(Normal(), u[2,:])) + end + X = hcat(df_X.X, df_X.X_1) + cor_sigma = [1 corΣ[k][j1,j2]; corΣ[k][j1,j2] 1] + Σ⁻¹ = inv(cor_sigma) - plot!(quantile(Chi(2),ecdfX2), X2, xlabel = L"$\chi (2)$-quantile",c = my_color(k, K), ylabel = "Observed Mahalanobis distance", label = L"Z = %$k ", legend = :topleft, lw = 2) -end + X2 = [(x' * Σ⁻¹ * x) for x in eachrow(X)] |> sort + ecdfX2 = ecdf(X2)(X2) * length(X2) / (length(X2) + 1) + + plot!(quantile(Chisq(2), ecdfX2), X2, xlabel=L"$\chi^2(2)$-quantile", c=my_color(k, K), ylabel="Observed squared Mahalanobis distance", label=L"Z = %$k ", legend=:topleft, lw=2) + end title!("$(station_name[j1]) vs $(station_name[j2])") - xlims!(0,4.5) - ylims!(0,4.5) + xlims!(0, 22) + ylims!(0, 22) end ```` -````@example tuto_paper -savefigcrop(plt_qqp_copula, joinpath(save_tuto_path, "qq_copula_$(station_name[j1])_$(station_name[j2])_Z_full")); -nothing #hide -```` - --- *This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).* diff --git a/examples/tuto_paper.jl b/examples/tuto_paper.jl index 8fabb4f..fdb8877 100644 --- a/examples/tuto_paper.jl +++ b/examples/tuto_paper.jl @@ -6,10 +6,12 @@ md""" """ md""" -This tutorial describes the Stochastic Weather Generator described in the paper *Interpretable Seasonal Hidden Markov Model for spatio-temporal stochastic rain generation in France* by [Emmanuel Gobet](http://www.cmap.polytechnique.fr/~gobet/) (CMAP - École Polytechnique), [David Métivier](https://davidmetivier.mistea.inrae.fr/) (MISTEA -- INRAE) and [Sylvie Parey](https://fr.linkedin.com/in/sylvie-parey-60285194) (R&D -- EDF). -The paper is **available** on [HAL at this link](https://hal.inrae.fr/hal-04621349). -It provides a step-by-step construction of the Seasonal Hidden Markov Model (SHMM), the interpretation of the hidden states as Weather regimes over France and eventually the validation of the model with simulations. -![Schematic of the Seasonal Hidden Markov Model](https://github.com/dmetivie/StochasticWeatherGenerators.jl/assets/46794064/5fe1d677-877d-4fd5-83ac-29d30f728ca5) +This tutorial describes the numerical applications described in the paper [*Interpretable Seasonal Hidden Markov Model for spatio-temporal stochastic rain generation in France*](https://hal.inrae.fr/hal-04621349) by [Emmanuel Gobet](http://www.cmap.polytechnique.fr/~gobet/) (CMAP - École Polytechnique), [David Métivier](https://davidmetivier.mistea.inrae.fr/) (MISTEA -- INRAE) and [Sylvie Parey](https://fr.linkedin.com/in/sylvie-parey-60285194) (R&D -- EDF). +It shows a fully reproducible example on how to use the package `StochasticWeatherGenerators.jl` to reproduce, step-by-step, exactly (almost) all the figures of the paper. + +The paper describes the construction of a Stochastic Weather Generator with an Autoregressive Seasonal Hidden Markov Model (SHMM). The SHMM is trained with French weather stations, and the hidden states are interpreted as weather regimes. The model is validated with simulations, especially for its ability to reproduce extreme weather, e.g. droughts. +[Schematic of the Autoregressive Seasonal Hidden Markov Model](https://github.com/dmetivie/StochasticWeatherGenerators.jl/assets/46794064/5fe1d677-877d-4fd5-83ac-29d30f728ca5) +In the paper, the model is also used with Climate Change RCP scenarios (not shown here). """ md""" @@ -112,7 +114,7 @@ md""" Define the French area for map (Longitude and latitude) plot and the precision of the map `precision_scale` """ -precision_scale = "50m" +precision_scale = 50 # meter LON_min = -5 # West @@ -274,7 +276,7 @@ LON_idx = dms_to_dd.(station.LON) long_spell = [longuest_spell(y) for y in eachcol(Y)] -FR_map_spell = map_with_stations(LON_idx, LAT_idx, long_spell; station_name=station_name, show_value=true, colorbar_show=true) +FR_map_spell = map_with_stations(LON_idx, LAT_idx, long_spell; station_name=station_name, show_value=true, colorbar_show=true, precision_scale = precision_scale) #- savefigcrop(FR_map_spell, "FR_longest_dry_spell_$(D)_station_histo", save_tuto_path); #src @@ -401,7 +403,7 @@ $\mathbb{P}(Y = \text{Rain}\mid Z = k, H = h)$ with `h = memory_past_cat` For now there are some scale rendering issues due to an [GeoMakie.jl issue](https://github.com/MakieOrg/GeoMakie.jl/issues/268) so it might be tiny. """ -p_FR_map_mean_prob = map_with_stations(LON_idx, LAT_idx, [[mean(succprob.(hmm_fit.B[k, :, j, memory_past_cat])) for j in 1:length(STAID)] for k in 1:K], colorbar_show=true, colorbar_title = L"\mathbb{P}(Y = \text{Rain}\mid Z = k, H = 1)") +p_FR_map_mean_prob = map_with_stations(LON_idx, LAT_idx, [[mean(succprob.(hmm_fit.B[k, :, j, memory_past_cat])) for j in 1:length(STAID)] for k in 1:K], colorbar_show=true, colorbar_title = L"\mathbb{P}(Y = \text{Rain}\mid Z = k, H = 1)", precision_scale = precision_scale) #- savefigcrop(p_FR_map_mean_prob, "FR_K_$(K)_d_$(𝐃𝐞𝐠)_m_$(local_order)_mean_proba_cat_1", save_tuto_path); #src diff --git a/examples/utilities_geo_makie_features.jl b/examples/utilities_geo_makie_features.jl index 3bbbd6f..f2238dd 100644 --- a/examples/utilities_geo_makie_features.jl +++ b/examples/utilities_geo_makie_features.jl @@ -25,7 +25,7 @@ function map_with_stations(LON_idx, LAT_idx, value=:coral; station_name=nothing, borders_ne = naturalearth("admin_0_boundary_lines_land", precision_scale) coastlines_ne = naturalearth("coastline", precision_scale) - rivers_ne = naturalearth("rivers_lake_centerlines_scale_rank", 10) + rivers_ne = naturalearth("rivers_lake_centerlines_scale_rank", precision_scale) ocean_ne = naturalearth("ocean", precision_scale) fig = GeoMakie.with_theme(GeoMakie.theme_latexfonts(), fontsize=fontsize) do @@ -84,11 +84,11 @@ function map_with_stations(LON_idx, LAT_idx, value::AbstractArray{V}; station_na borders_ne = naturalearth("admin_0_boundary_lines_land", precision_scale) coastlines_ne = naturalearth("coastline", precision_scale) - rivers_ne = naturalearth("rivers_lake_centerlines_scale_rank", 10) + rivers_ne = naturalearth("rivers_lake_centerlines_scale_rank", precision_scale) ocean_ne = naturalearth("ocean", precision_scale) fig = GeoMakie.with_theme(GeoMakie.theme_latexfonts(), fontsize=fontsize) do - fig = GeoMakie.Figure() + fig = GeoMakie.Figure(size=(800,250)) ax = [GeoMakie.GeoAxis(fig[1:3, k], dest="+proj=merc", xgridvisible=false, ygridvisible=false, xticklabelsvisible=true, yticklabelsvisible=true, xticksvisible=false, yticksvisible=false, title = L"Z = %$k") for k in 1:K] for k in 1:K GeoMakie.xlims!(ax[k], LON_min, LON_max) @@ -98,7 +98,7 @@ function map_with_stations(LON_idx, LAT_idx, value::AbstractArray{V}; station_na GeoMakie.lines!(ax[k], GeoMakie.to_multilinestring.(coastlines_ne.geometry); color=:gray, linewidth=0.75) GeoMakie.lines!(ax[k], GeoMakie.to_multilinestring.(rivers_ne.geometry); linewidth=0.5) - sc = GeoMakie.scatter!(ax[k], LON_idx, LAT_idx; color=value[k], markersize=10, colormap=GeoMakie.Reverse(:plasma), colorrange=(0, 1)) + sc = GeoMakie.scatter!(ax[k], LON_idx, LAT_idx; color=value[k], markersize=11, colormap=GeoMakie.Reverse(:plasma), colorrange=(0, 1)) if show_value == true for i in eachindex(station_name) if i == 7 @@ -123,7 +123,7 @@ function map_with_stations(LON_idx, LAT_idx, value::AbstractArray{V}; station_na end end if colorbar_show == true && k == K - GeoMakie.Colorbar(fig[:, K+1], sc, height=GeoMakie.Relative(0.33), label = colorbar_title) + GeoMakie.Colorbar(fig[:, K+1], sc, height=GeoMakie.Relative(1), label = colorbar_title) GeoMakie.colgap!(fig.layout, -7) end k > 1 && GeoMakie.hideydecorations!(ax[k], ticks = false)