feat: make fou estimator more flexible & add copula analysis module

rust-dd · Jan 1, 2025 · c1edce3 · c1edce3
1 parent 81098d6
commit c1edce3
Show file tree

Hide file tree

Showing 5 changed files with 360 additions and 22 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@ flamegraph.svg
 *.env
 venv
 CI.yml
+/test
+.idea/
diff --git a/src/main.rs b/src/main.rs
@@ -1,29 +1,70 @@
-use stochastic_rs::stochastic::{noise::fgn::FGN, Sampling};
+use std::error::Error;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
 
-fn main() {
-  let fbm = FGN::new(0.9, 10000, None, Some(10000));
+use stochastic_rs::stats::fou_estimator::{
+  FOUParameterEstimationV1, FOUParameterEstimationV2, FilterType,
+};
+use stochastic_rs::stats::fd::FractalDim;
 
-  let start = std::time::Instant::now();
-  for _ in 0..10000 {
-    let _ = fbm.sample();
+// Import your estimator modules and types here
+// use your_crate::{FOUParameterEstimationV1, FOUParameterEstimationV2, FilterType};
+
+fn main() -> Result<(), Box<dyn Error>> {
+  // File paths
+  let paths = vec![
+    "./test/kecskekut_original.txt",
+    "./test/kecskekut_sim.txt",
+    "./test/kecskekut_without_jumps.txt",
+    "./test/komlos_original.txt",
+    "./test/komlos_sim.txt",
+    "./test/komlos_without_jumps.txt",
+  ];
+
+  // Process other datasets
+  for path in paths {
+    println!("\nProcessing {}", path);
+    let data = read_vector_from_file(path)?;
+
+    // V1 Estimation
+    let mut estimator_v1 =
+      FOUParameterEstimationV1::new(data.clone().into(), FilterType::Daubechies);
+    let hurst = FractalDim::new(data.clone().into());
+    let hurst = hurst.higuchi_fd(10);
+    estimator_v1.set_hurst(2. - hurst);
+    let (hurst_v1, sigma_v1, mu_v1, theta_v1) = estimator_v1.estimate_parameters();
+    println!("V1 - Estimated Parameters for {}:", path);
+    println!("  Hurst exponent: {}", hurst_v1);
+    println!("  Sigma: {}", sigma_v1);
+    println!("  Mu: {}", mu_v1);
+    println!("  Theta: {}", theta_v1);
+
+    // V2 Estimation
+    let delta = 1.0 / 256.0; // Adjust as necessary
+    let n = data.len();
+    let mut estimator_v2 = FOUParameterEstimationV2::new(data.clone().into(), delta, n);
+    estimator_v2.set_hurst(2. - hurst);
+    let (hurst_v2, sigma_v2, mu_v2, theta_v2) = estimator_v2.estimate_parameters();
+    println!("V1 - Estimated Parameters for {}:", path);
+    println!("  Hurst exponent: {}", hurst_v2);
+    println!("  Sigma: {}", sigma_v2);
+    println!("  Mu: {}", mu_v2);
+    println!("  Theta: {}", theta_v2);
   }
 
-  let duration = start.elapsed();
-  println!(
-    "Time elapsed in expensive_function() is: {:?}",
-    duration.as_secs_f32()
-  );
+  Ok(())
+}
 
-  let fbm = FGN::new(0.9, 10000, None, Some(10000));
+fn read_vector_from_file(filename: &str) -> Result<Vec<f64>, Box<dyn Error>> {
+  let file = File::open(filename)?;
+  let reader = BufReader::new(file);
+  let mut data = Vec::new();
 
-  let start = std::time::Instant::now();
-  for _ in 0..10000 {
-    let _ = fbm.sample();
+  for line in reader.lines() {
+    let line = line?;
+    let value: f64 = line.trim().parse()?;
+    data.push(value);
   }
 
-  let duration = start.elapsed();
-  println!(
-    "Time elapsed in expensive_function() is: {:?}",
-    duration.as_secs_f32()
-  );
+  Ok(data)
 }
diff --git a/src/stats.rs b/src/stats.rs
@@ -1,4 +1,5 @@
 pub mod cir;
+pub mod copula;
 pub mod double_exp;
 pub mod fd;
 pub mod fou_estimator;

diff --git a/src/stats/copula.rs b/src/stats/copula.rs
@@ -0,0 +1,280 @@
+use ndarray::{Array, Array1, Array2};
+use plotly::{Plot, Scatter};
+use rand::prelude::*;
+use rand_distr::Uniform;
+use statrs::distribution::{ContinuousCDF, MultivariateNormal, Normal};
+
+/// ======================================================
+/// 1) Empirical Copula (Ranking)
+/// ======================================================
+pub fn empirical_copula(x: &Array1<f64>, y: &Array1<f64>) -> (Array1<f64>, Array1<f64>) {
+  // Sort indices for x
+  let mut idx_x = (0..x.len()).collect::<Vec<usize>>();
+  idx_x.sort_by(|&i, &j| x[i].partial_cmp(&x[j]).unwrap());
+  // Ranks for x
+  let mut rank_sx = Array::zeros(x.len());
+  for (i, &idx) in idx_x.iter().enumerate() {
+    rank_sx[idx] = (i + 1) as f64 / x.len() as f64;
+  }
+
+  // Sort indices for y
+  let mut idx_y = (0..y.len()).collect::<Vec<usize>>();
+  idx_y.sort_by(|&i, &j| y[i].partial_cmp(&y[j]).unwrap());
+  // Ranks for y
+  let mut rank_sy = Array::zeros(y.len());
+  for (i, &idx) in idx_y.iter().enumerate() {
+    rank_sy[idx] = (i + 1) as f64 / y.len() as f64;
+  }
+
+  (rank_sx, rank_sy)
+}
+
+/// ======================================================
+/// 2) Plot (Empirical) Copula
+/// ======================================================
+pub fn plot_copula(u: &Array1<f64>, v: &Array1<f64>, title: &str) {
+  let trace = Scatter::new(u.to_vec(), v.to_vec())
+    .mode(plotly::common::Mode::Markers)
+    .marker(plotly::common::Marker::new().size(4))
+    .name(title);
+
+  let mut plot = Plot::new();
+  plot.add_trace(trace);
+  plot.show();
+}
+
+/// ======================================================
+/// 3) Gaussian Copula
+/// ======================================================
+pub fn generate_gaussian_copula_sample(n: usize, rho: f64) -> (Array1<f64>, Array1<f64>) {
+  let mean = vec![0.0, 0.0];
+  let cov = vec![vec![1.0, rho], vec![rho, 1.0]];
+  let cov_flat = cov.into_iter().flatten().collect();
+
+  let mvn = MultivariateNormal::new(mean, cov_flat)
+    .expect("Invalid covariance matrix for MultivariateNormal.");
+
+  let mut rng = thread_rng();
+  let mut samples = Array2::<f64>::zeros((n, 2));
+
+  for i in 0..n {
+    let xy = mvn.sample(&mut rng);
+    samples[[i, 0]] = xy[0];
+    samples[[i, 1]] = xy[1];
+  }
+
+  // Transform each dimension by standard normal CDF to get U(0,1)
+  let standard_normal = Normal::new(0.0, 1.0).unwrap();
+  let cdf = |z: f64| standard_normal.cdf(z);
+
+  let u = samples.column(0).mapv(cdf);
+  let v = samples.column(1).mapv(cdf);
+
+  (u.to_owned(), v.to_owned())
+}
+
+/// ======================================================
+/// 4) Clayton Copula (Archimedean)
+/// ======================================================
+pub fn generate_clayton_copula_sample(n: usize, alpha: f64) -> (Array1<f64>, Array1<f64>) {
+  use rand_distr::{Exp, Gamma};
+  assert!(alpha > 0.0, "Clayton alpha must be > 0.");
+
+  let mut rng = thread_rng();
+  let gamma_dist = Gamma::new(1.0 / alpha, 1.0).unwrap();
+  let exp_dist = Exp::new(1.0).unwrap();
+
+  let mut u = Array1::<f64>::zeros(n);
+  let mut v = Array1::<f64>::zeros(n);
+
+  for i in 0..n {
+    let w = gamma_dist.sample(&mut rng);
+    let e1 = exp_dist.sample(&mut rng);
+    let e2 = exp_dist.sample(&mut rng);
+
+    // (1 + e1 / w)^(-1/alpha)
+    u[i] = (1.0 + e1 / w).powf(-1.0 / alpha);
+    v[i] = (1.0 + e2 / w).powf(-1.0 / alpha);
+  }
+  (u, v)
+}
+
+/// ======================================================
+/// 5) Gumbel Copula (Archimedean)
+/// ======================================================
+pub fn generate_gumbel_copula_sample(n: usize, alpha: f64) -> (Array1<f64>, Array1<f64>) {
+  use rand_distr::Exp;
+  assert!(alpha >= 1.0, "Gumbel alpha must be >= 1.0.");
+
+  let mut rng = thread_rng();
+  let exp_dist = Exp::new(1.0).unwrap();
+  let uniform_dist = Uniform::new(1e-15, 1.0 - 1e-15);
+
+  let mut u = Array1::<f64>::zeros(n);
+  let mut v = Array1::<f64>::zeros(n);
+
+  for i in 0..n {
+    let e1 = exp_dist.sample(&mut rng);
+    let e2 = exp_dist.sample(&mut rng);
+    // Avoid exact 0 or 1 for stable approx
+    let x = uniform_dist.sample(&mut rng);
+
+    let w_approx = e1 / (1.0 + x) + 1e-15; // ensure nonzero
+
+    let z1 = (e2 / w_approx as f64).powf(1.0 / alpha);
+    let z2 = (e1 / w_approx).powf(1.0 / alpha);
+
+    u[i] = (-z1).exp();
+    v[i] = (-z2).exp();
+  }
+  (u, v)
+}
+
+/// ======================================================
+/// 6) Frank Copula (Archimedean) - with clamping
+/// ======================================================
+pub fn generate_frank_copula_sample(n: usize, theta: f64) -> (Array1<f64>, Array1<f64>) {
+  assert!(theta != 0.0, "Frank copula parameter must be non-zero.");
+
+  let mut rng = thread_rng();
+  let uni = Uniform::new(1e-15, 1.0 - 1e-15);
+
+  let mut u = Array1::<f64>::zeros(n);
+  let mut v = Array1::<f64>::zeros(n);
+
+  for i in 0..n {
+    // Avoid exact 0 or 1 by sampling in (1e-15, 1 - 1e-15)
+    let uu = uni.sample(&mut rng);
+    let zz = uni.sample(&mut rng);
+    u[i] = uu;
+
+    let denom = 1.0 - (-theta * uu).exp();
+    let numerator = (1.0 - (-theta).exp()) * zz;
+    let mut inside = 1.0 - numerator / denom;
+
+    // If inside <= 0 => clamp to a small positive
+    if inside <= 1e-15 {
+      inside = 1e-15;
+    }
+
+    v[i] = -1.0 / theta * inside.ln();
+  }
+  (u, v)
+}
+
+/// ======================================================
+/// 7) Tests (including Empirical Copula)
+/// ======================================================
+#[cfg(test)]
+mod tests {
+  use super::*;
+  use approx::assert_abs_diff_eq;
+
+  const N: usize = 500; // sample size for tests
+
+  // ----------------------------------------------
+  // A) Direct Empirical Copula Test
+  // ----------------------------------------------
+  #[test]
+  fn test_empirical_copula_direct() {
+    let mut rng = thread_rng();
+    let uniform_dist = Uniform::new(0.0, 1.0);
+
+    let x_data = (0..N).map(|_| uniform_dist.sample(&mut rng)).collect();
+    let y_data = (0..N).map(|_| uniform_dist.sample(&mut rng)).collect();
+
+    let x_arr = Array1::from_vec(x_data);
+    let y_arr = Array1::from_vec(y_data);
+
+    // Build empirical copula
+    let (sx, sy) = empirical_copula(&x_arr, &y_arr);
+
+    // Check range
+    assert!(sx.iter().all(|&x| x >= 0.0 && x <= 1.0));
+    assert!(sy.iter().all(|&y| y >= 0.0 && y <= 1.0));
+
+    // Means near 0.5
+    assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1);
+    assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1);
+
+    // Plot
+    plot_copula(&sx, &sy, "Empirical Copula (Direct Uniform Data)");
+  }
+
+  // ----------------------------------------------
+  // B) Gaussian Copula Test
+  // ----------------------------------------------
+  #[test]
+  fn test_gaussian_copula() {
+    let rho = 0.7;
+    let (u, v) = generate_gaussian_copula_sample(N, rho);
+    let (sx, sy) = empirical_copula(&u, &v);
+
+    assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0));
+    assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0));
+
+    plot_copula(&sx, &sy, "Empirical Copula (Gaussian, rho=0.7)");
+
+    // Means ~ 0.5
+    assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1);
+    assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1);
+  }
+
+  // ----------------------------------------------
+  // C) Clayton Copula Test (Archimedean)
+  // ----------------------------------------------
+  #[test]
+  fn test_clayton_copula() {
+    let alpha = 1.5;
+    let (u, v) = generate_clayton_copula_sample(N, alpha);
+    let (sx, sy) = empirical_copula(&u, &v);
+
+    assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0));
+    assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0));
+
+    plot_copula(&sx, &sy, "Empirical Copula (Clayton, alpha=1.5)");
+
+    assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1);
+    assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1);
+  }
+
+  // ----------------------------------------------
+  // D) Gumbel Copula Test (Archimedean)
+  // ----------------------------------------------
+  #[test]
+  fn test_gumbel_copula() {
+    let alpha = 1.5; // Gumbel parameter >= 1
+    let (u, v) = generate_gumbel_copula_sample(N, alpha);
+    let (sx, sy) = empirical_copula(&u, &v);
+
+    assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0));
+    assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0));
+
+    plot_copula(&sx, &sy, "Empirical Copula (Gumbel, alpha=1.5)");
+
+    // Gumbel can have heavier tail dependence, so allow a bit more tolerance
+    assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.2);
+    assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.2);
+  }
+
+  // ----------------------------------------------
+  // E) Frank Copula Test (Archimedean)
+  // ----------------------------------------------
+  #[test]
+  fn test_frank_copula() {
+    let theta = 0.5;
+    let (u, v) = generate_frank_copula_sample(N, theta);
+    let (sx, sy) = empirical_copula(&u, &v);
+
+    // Check range only if you clamp inside the generator
+    // TODO: this test is failing
+    assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0));
+    assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0));
+
+    plot_copula(&sx, &sy, "Empirical Copula (Frank, theta=5.0)");
+
+    // Means ~ 0.5
+    assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1);
+    assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1);
+  }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,5 @@ flamegraph.svg @@
     *.env
     venv
     CI.yml
+    /test
+    .idea/