diff --git a/Project.toml b/Project.toml index cf12f5f..b2d3ca5 100644 --- a/Project.toml +++ b/Project.toml @@ -13,14 +13,15 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Geodesy = "0ef565a4-170c-5f04-8de2-149903a85f3d" Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NearestCorrelationMatrix = "59ddf330-608c-4938-8bc9-a4ee97bbbea6" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RollingFunctions = "b0e4dd01-7b14-53d8-9b45-175a3e362653" SmoothPeriodicStatsModels = "3d8e1505-165e-443f-a184-83a230b5a9e5" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -julia = "1" SmoothPeriodicStatsModels = "1" +julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/examples/tuto_paper.jl b/examples/tuto_paper.jl index 256fa3f..bbd6597 100644 --- a/examples/tuto_paper.jl +++ b/examples/tuto_paper.jl @@ -500,7 +500,8 @@ We fit a Gaussian copula to each pair of stations for joint rainy days only. """ #!nb # !!! warning #!nb # For some hidden states corresponding to dry weather, it might happen that for some pair of stations, there are not enough simultaneous rain occurrences in a rain category $Z = k$ to estimate a correlation coefficient. -#!nb # In that case a `missing` coefficient is returned by `cov_RR`. +#!nb # In that case a `missing` coefficient is returned by `cov_RR` or it returns the value `impute_missing` if specified (to avoid missing). +#!nb # The option `force_PosDef` (enabled by default) ensures having positive definite matrix. This is necessary to use gaussian copula. Σ²RR = cov_RR(data_stations_z, K) diff --git a/src/StochasticWeatherGenerators.jl b/src/StochasticWeatherGenerators.jl index 74342d6..42131d6 100644 --- a/src/StochasticWeatherGenerators.jl +++ b/src/StochasticWeatherGenerators.jl @@ -7,6 +7,7 @@ using DataFrames, DataFramesMeta, Dates using CSV, Printf # File Read/Load using LinearAlgebra: tril, I using Copulas # for correlated generation +using NearestCorrelationMatrix: nearest_cor! using RollingFunctions # rollmean for climate indices using SmoothPeriodicStatsModels: αₜ, σₜ, μₜ, ρₜ using SmoothPeriodicStatsModels: AR1, model_for_loglikelihood_AR1, initialvalue_optimize! @@ -40,7 +41,7 @@ include("temperature/correlations.jl") include("climate_indices.jl") # ## Rain -export rand_RR, fit_mle_RR, cov_RR, fit_mle_stations +export rand_RR, fit_mle_RR, cov_RR, cor_RR, fit_mle_stations export joint_rain, Σ_Spearman2Pearson, Σ_Kendall2Pearson, corTail # ## AR1 diff --git a/src/rain/correlations.jl b/src/rain/correlations.jl index 00ebee4..cc34912 100644 --- a/src/rain/correlations.jl +++ b/src/rain/correlations.jl @@ -52,9 +52,9 @@ function corTail(x, q=0.95) return (c + c') / 2 end -#TODO: a version without K +#TODO: is NearestCorrelationMatrix.jl the best way to enforce positivity? Uncertainty in estimation could be interesting (coefficients we trust or not). """ - cor_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pearson, force_PosDef = true) + cor_RR(dfs::AbstractArray{<:DataFrame}[, K]; cor_method=Σ_Spearman2Pearson, force_PosDef = true) Compute the (strictly positive) rain pair correlations `cor(Rs₁ > 0, Rs₂ > 0)` between each pair of stations `s₁, s₂` for each hidden state `Z = k`. Input: a array `dfs` of `df::DataFrame` of length `S` (number of station) where each `df` have :DATE, :RR, :z (same :z for each df). @@ -63,7 +63,7 @@ Output: `K` correlation matrix of size `S×S` Options: -- `force_PosDef` will enforce Positive Definite matrix with `sqrt.(ΣS_k.^2)`. +- `force_PosDef` will enforce Positive Definite matrix with [NearestCorrelationMatrix.jl](https://github.com/adknudson/NearestCorrelationMatrix.jl). - `cor_method`: typically `Σ_Spearman2Pearson` or `Σ_Kendall2Pearson` - `impute_missing`: if `nothing`, `missing` will be outputted when two stations do not have at least two rain days in common. Otherwise the value `impute_missing` will be set. @@ -86,7 +86,7 @@ function cor_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pears if all([all((!ismissing).(S)) for S in ΣS_k]) # are they no missing? ΣS_k = convert.(Matrix{Float64}, ΣS_k) if force_PosDef - ΣS_k = sqrt.(ΣS_k .^ 2) + ΣS_k = nearest_cor!.(ΣS_k) end else for k in 1:K @@ -100,7 +100,7 @@ function cor_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pears end """ - cov_RR(dfs::AbstractArray{<:DataFrame}, K; cor_method=Σ_Spearman2Pearson, force_PosDef = true) + cov_RR(dfs::AbstractArray{<:DataFrame}[, K]; cor_method=Σ_Spearman2Pearson, force_PosDef = true) Compute the (strictly positive) rain pair covariance `cov(Rs₁ > 0, Rs₂ > 0)` between each pair of stations `s₁, s₂` for each hidden state `Z = k`. Input: a array `dfs` of `df::DataFrame` of length `S` (number of station) where each `df` have :DATE, :RR, :z (same :z for each df). @@ -109,7 +109,7 @@ Output: `K` covariance matrix of size `S×S` Options: -- `force_PosDef` will enforce Positive Definite matrix with `sqrt.(ΣS_k.^2)`. +- `force_PosDef` will enforce Positive Definite matrix with [NearestCorrelationMatrix.jl](https://github.com/adknudson/NearestCorrelationMatrix.jl). - `cor_method`: typically `Σ_Spearman2Pearson` or `Σ_Kendall2Pearson` - `impute_missing`: if `nothing`, `missing` will be outputted when two stations do not have at least two rain days in common. Otherwise the value `impute_missing` will be set. @@ -135,7 +135,7 @@ function cor_RR(dfs::AbstractArray{<:DataFrame}; cor_method=Σ_Spearman2Pearson, if all((!ismissing).(ΣS)) # are they no missing? ΣS = convert(Matrix{Float64}, ΣS) if force_PosDef - ΣS = sqrt(ΣS^2) + nearest_cor!(ΣS) end else aremissing = findall(ismissing, ΣS)