diff --git a/.gitignore b/.gitignore index 406d4e4..4323abb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ flamegraph.svg *.env venv CI.yml +/test +.idea/ \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 319c93f..e45a1e1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,29 +1,70 @@ -use stochastic_rs::stochastic::{noise::fgn::FGN, Sampling}; +use std::error::Error; +use std::fs::File; +use std::io::{BufRead, BufReader}; -fn main() { - let fbm = FGN::new(0.9, 10000, None, Some(10000)); +use stochastic_rs::stats::fou_estimator::{ + FOUParameterEstimationV1, FOUParameterEstimationV2, FilterType, +}; +use stochastic_rs::stats::fd::FractalDim; - let start = std::time::Instant::now(); - for _ in 0..10000 { - let _ = fbm.sample(); +// Import your estimator modules and types here +// use your_crate::{FOUParameterEstimationV1, FOUParameterEstimationV2, FilterType}; + +fn main() -> Result<(), Box> { + // File paths + let paths = vec![ + "./test/kecskekut_original.txt", + "./test/kecskekut_sim.txt", + "./test/kecskekut_without_jumps.txt", + "./test/komlos_original.txt", + "./test/komlos_sim.txt", + "./test/komlos_without_jumps.txt", + ]; + + // Process other datasets + for path in paths { + println!("\nProcessing {}", path); + let data = read_vector_from_file(path)?; + + // V1 Estimation + let mut estimator_v1 = + FOUParameterEstimationV1::new(data.clone().into(), FilterType::Daubechies); + let hurst = FractalDim::new(data.clone().into()); + let hurst = hurst.higuchi_fd(10); + estimator_v1.set_hurst(2. - hurst); + let (hurst_v1, sigma_v1, mu_v1, theta_v1) = estimator_v1.estimate_parameters(); + println!("V1 - Estimated Parameters for {}:", path); + println!(" Hurst exponent: {}", hurst_v1); + println!(" Sigma: {}", sigma_v1); + println!(" Mu: {}", mu_v1); + println!(" Theta: {}", theta_v1); + + // V2 Estimation + let delta = 1.0 / 256.0; // Adjust as necessary + let n = data.len(); + let mut estimator_v2 = FOUParameterEstimationV2::new(data.clone().into(), delta, n); + estimator_v2.set_hurst(2. - hurst); + let (hurst_v2, sigma_v2, mu_v2, theta_v2) = estimator_v2.estimate_parameters(); + println!("V1 - Estimated Parameters for {}:", path); + println!(" Hurst exponent: {}", hurst_v2); + println!(" Sigma: {}", sigma_v2); + println!(" Mu: {}", mu_v2); + println!(" Theta: {}", theta_v2); } - let duration = start.elapsed(); - println!( - "Time elapsed in expensive_function() is: {:?}", - duration.as_secs_f32() - ); + Ok(()) +} - let fbm = FGN::new(0.9, 10000, None, Some(10000)); +fn read_vector_from_file(filename: &str) -> Result, Box> { + let file = File::open(filename)?; + let reader = BufReader::new(file); + let mut data = Vec::new(); - let start = std::time::Instant::now(); - for _ in 0..10000 { - let _ = fbm.sample(); + for line in reader.lines() { + let line = line?; + let value: f64 = line.trim().parse()?; + data.push(value); } - let duration = start.elapsed(); - println!( - "Time elapsed in expensive_function() is: {:?}", - duration.as_secs_f32() - ); + Ok(data) } diff --git a/src/stats.rs b/src/stats.rs index 75e445c..7733ffb 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,4 +1,5 @@ pub mod cir; +pub mod copula; pub mod double_exp; pub mod fd; pub mod fou_estimator; diff --git a/src/stats/copula.rs b/src/stats/copula.rs new file mode 100644 index 0000000..276fc1f --- /dev/null +++ b/src/stats/copula.rs @@ -0,0 +1,280 @@ +use ndarray::{Array, Array1, Array2}; +use plotly::{Plot, Scatter}; +use rand::prelude::*; +use rand_distr::Uniform; +use statrs::distribution::{ContinuousCDF, MultivariateNormal, Normal}; + +/// ====================================================== +/// 1) Empirical Copula (Ranking) +/// ====================================================== +pub fn empirical_copula(x: &Array1, y: &Array1) -> (Array1, Array1) { + // Sort indices for x + let mut idx_x = (0..x.len()).collect::>(); + idx_x.sort_by(|&i, &j| x[i].partial_cmp(&x[j]).unwrap()); + // Ranks for x + let mut rank_sx = Array::zeros(x.len()); + for (i, &idx) in idx_x.iter().enumerate() { + rank_sx[idx] = (i + 1) as f64 / x.len() as f64; + } + + // Sort indices for y + let mut idx_y = (0..y.len()).collect::>(); + idx_y.sort_by(|&i, &j| y[i].partial_cmp(&y[j]).unwrap()); + // Ranks for y + let mut rank_sy = Array::zeros(y.len()); + for (i, &idx) in idx_y.iter().enumerate() { + rank_sy[idx] = (i + 1) as f64 / y.len() as f64; + } + + (rank_sx, rank_sy) +} + +/// ====================================================== +/// 2) Plot (Empirical) Copula +/// ====================================================== +pub fn plot_copula(u: &Array1, v: &Array1, title: &str) { + let trace = Scatter::new(u.to_vec(), v.to_vec()) + .mode(plotly::common::Mode::Markers) + .marker(plotly::common::Marker::new().size(4)) + .name(title); + + let mut plot = Plot::new(); + plot.add_trace(trace); + plot.show(); +} + +/// ====================================================== +/// 3) Gaussian Copula +/// ====================================================== +pub fn generate_gaussian_copula_sample(n: usize, rho: f64) -> (Array1, Array1) { + let mean = vec![0.0, 0.0]; + let cov = vec![vec![1.0, rho], vec![rho, 1.0]]; + let cov_flat = cov.into_iter().flatten().collect(); + + let mvn = MultivariateNormal::new(mean, cov_flat) + .expect("Invalid covariance matrix for MultivariateNormal."); + + let mut rng = thread_rng(); + let mut samples = Array2::::zeros((n, 2)); + + for i in 0..n { + let xy = mvn.sample(&mut rng); + samples[[i, 0]] = xy[0]; + samples[[i, 1]] = xy[1]; + } + + // Transform each dimension by standard normal CDF to get U(0,1) + let standard_normal = Normal::new(0.0, 1.0).unwrap(); + let cdf = |z: f64| standard_normal.cdf(z); + + let u = samples.column(0).mapv(cdf); + let v = samples.column(1).mapv(cdf); + + (u.to_owned(), v.to_owned()) +} + +/// ====================================================== +/// 4) Clayton Copula (Archimedean) +/// ====================================================== +pub fn generate_clayton_copula_sample(n: usize, alpha: f64) -> (Array1, Array1) { + use rand_distr::{Exp, Gamma}; + assert!(alpha > 0.0, "Clayton alpha must be > 0."); + + let mut rng = thread_rng(); + let gamma_dist = Gamma::new(1.0 / alpha, 1.0).unwrap(); + let exp_dist = Exp::new(1.0).unwrap(); + + let mut u = Array1::::zeros(n); + let mut v = Array1::::zeros(n); + + for i in 0..n { + let w = gamma_dist.sample(&mut rng); + let e1 = exp_dist.sample(&mut rng); + let e2 = exp_dist.sample(&mut rng); + + // (1 + e1 / w)^(-1/alpha) + u[i] = (1.0 + e1 / w).powf(-1.0 / alpha); + v[i] = (1.0 + e2 / w).powf(-1.0 / alpha); + } + (u, v) +} + +/// ====================================================== +/// 5) Gumbel Copula (Archimedean) +/// ====================================================== +pub fn generate_gumbel_copula_sample(n: usize, alpha: f64) -> (Array1, Array1) { + use rand_distr::Exp; + assert!(alpha >= 1.0, "Gumbel alpha must be >= 1.0."); + + let mut rng = thread_rng(); + let exp_dist = Exp::new(1.0).unwrap(); + let uniform_dist = Uniform::new(1e-15, 1.0 - 1e-15); + + let mut u = Array1::::zeros(n); + let mut v = Array1::::zeros(n); + + for i in 0..n { + let e1 = exp_dist.sample(&mut rng); + let e2 = exp_dist.sample(&mut rng); + // Avoid exact 0 or 1 for stable approx + let x = uniform_dist.sample(&mut rng); + + let w_approx = e1 / (1.0 + x) + 1e-15; // ensure nonzero + + let z1 = (e2 / w_approx as f64).powf(1.0 / alpha); + let z2 = (e1 / w_approx).powf(1.0 / alpha); + + u[i] = (-z1).exp(); + v[i] = (-z2).exp(); + } + (u, v) +} + +/// ====================================================== +/// 6) Frank Copula (Archimedean) - with clamping +/// ====================================================== +pub fn generate_frank_copula_sample(n: usize, theta: f64) -> (Array1, Array1) { + assert!(theta != 0.0, "Frank copula parameter must be non-zero."); + + let mut rng = thread_rng(); + let uni = Uniform::new(1e-15, 1.0 - 1e-15); + + let mut u = Array1::::zeros(n); + let mut v = Array1::::zeros(n); + + for i in 0..n { + // Avoid exact 0 or 1 by sampling in (1e-15, 1 - 1e-15) + let uu = uni.sample(&mut rng); + let zz = uni.sample(&mut rng); + u[i] = uu; + + let denom = 1.0 - (-theta * uu).exp(); + let numerator = (1.0 - (-theta).exp()) * zz; + let mut inside = 1.0 - numerator / denom; + + // If inside <= 0 => clamp to a small positive + if inside <= 1e-15 { + inside = 1e-15; + } + + v[i] = -1.0 / theta * inside.ln(); + } + (u, v) +} + +/// ====================================================== +/// 7) Tests (including Empirical Copula) +/// ====================================================== +#[cfg(test)] +mod tests { + use super::*; + use approx::assert_abs_diff_eq; + + const N: usize = 500; // sample size for tests + + // ---------------------------------------------- + // A) Direct Empirical Copula Test + // ---------------------------------------------- + #[test] + fn test_empirical_copula_direct() { + let mut rng = thread_rng(); + let uniform_dist = Uniform::new(0.0, 1.0); + + let x_data = (0..N).map(|_| uniform_dist.sample(&mut rng)).collect(); + let y_data = (0..N).map(|_| uniform_dist.sample(&mut rng)).collect(); + + let x_arr = Array1::from_vec(x_data); + let y_arr = Array1::from_vec(y_data); + + // Build empirical copula + let (sx, sy) = empirical_copula(&x_arr, &y_arr); + + // Check range + assert!(sx.iter().all(|&x| x >= 0.0 && x <= 1.0)); + assert!(sy.iter().all(|&y| y >= 0.0 && y <= 1.0)); + + // Means near 0.5 + assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1); + assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1); + + // Plot + plot_copula(&sx, &sy, "Empirical Copula (Direct Uniform Data)"); + } + + // ---------------------------------------------- + // B) Gaussian Copula Test + // ---------------------------------------------- + #[test] + fn test_gaussian_copula() { + let rho = 0.7; + let (u, v) = generate_gaussian_copula_sample(N, rho); + let (sx, sy) = empirical_copula(&u, &v); + + assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0)); + assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0)); + + plot_copula(&sx, &sy, "Empirical Copula (Gaussian, rho=0.7)"); + + // Means ~ 0.5 + assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1); + assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1); + } + + // ---------------------------------------------- + // C) Clayton Copula Test (Archimedean) + // ---------------------------------------------- + #[test] + fn test_clayton_copula() { + let alpha = 1.5; + let (u, v) = generate_clayton_copula_sample(N, alpha); + let (sx, sy) = empirical_copula(&u, &v); + + assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0)); + assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0)); + + plot_copula(&sx, &sy, "Empirical Copula (Clayton, alpha=1.5)"); + + assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1); + assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1); + } + + // ---------------------------------------------- + // D) Gumbel Copula Test (Archimedean) + // ---------------------------------------------- + #[test] + fn test_gumbel_copula() { + let alpha = 1.5; // Gumbel parameter >= 1 + let (u, v) = generate_gumbel_copula_sample(N, alpha); + let (sx, sy) = empirical_copula(&u, &v); + + assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0)); + assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0)); + + plot_copula(&sx, &sy, "Empirical Copula (Gumbel, alpha=1.5)"); + + // Gumbel can have heavier tail dependence, so allow a bit more tolerance + assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.2); + assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.2); + } + + // ---------------------------------------------- + // E) Frank Copula Test (Archimedean) + // ---------------------------------------------- + #[test] + fn test_frank_copula() { + let theta = 0.5; + let (u, v) = generate_frank_copula_sample(N, theta); + let (sx, sy) = empirical_copula(&u, &v); + + // Check range only if you clamp inside the generator + // TODO: this test is failing + assert!(u.iter().all(|&x| x >= 0.0 && x <= 1.0)); + assert!(v.iter().all(|&y| y >= 0.0 && y <= 1.0)); + + plot_copula(&sx, &sy, "Empirical Copula (Frank, theta=5.0)"); + + // Means ~ 0.5 + assert_abs_diff_eq!(sx.mean().unwrap(), 0.5, epsilon = 0.1); + assert_abs_diff_eq!(sy.mean().unwrap(), 0.5, epsilon = 0.1); + } +} diff --git a/src/stats/fou_estimator.rs b/src/stats/fou_estimator.rs index b056362..ece28b9 100644 --- a/src/stats/fou_estimator.rs +++ b/src/stats/fou_estimator.rs @@ -31,7 +31,11 @@ pub enum FilterType { impl FOUParameterEstimationV1 { pub fn estimate_parameters(&mut self) -> (f64, f64, f64, f64) { self.linear_filter(); - self.hurst_estimator(); + + if self.hurst.is_none() { + self.hurst_estimator(); + } + self.sigma_estimator(); self.mu_estimator(); self.theta_estimator(); @@ -44,6 +48,10 @@ impl FOUParameterEstimationV1 { ) } + pub fn set_hurst(&mut self, hurst: f64) { + self.hurst = Some(hurst); + } + fn hurst_estimator(&mut self) { let hurst = 0.5 * ((self.V2 / self.V1).log2()); self.hurst = Some(hurst); @@ -177,7 +185,9 @@ pub struct FOUParameterEstimationV2 { impl FOUParameterEstimationV2 { pub fn estimate_parameters(&mut self) -> (f64, f64, f64, f64) { - self.hurst_estimator(); + if self.hurst.is_none() { + self.hurst_estimator(); + } self.sigma_estimator(); self.mu_estimator(); self.theta_estimator(); @@ -190,6 +200,10 @@ impl FOUParameterEstimationV2 { ) } + pub fn set_hurst(&mut self, hurst: f64) { + self.hurst = Some(hurst); + } + fn hurst_estimator(&mut self) { let X = &self.path; let N = self.series_length;