From 9b73eaab697097e44c434a983f3aed238fce1e2c Mon Sep 17 00:00:00 2001 From: peter cotton Date: Tue, 3 Dec 2024 17:20:17 -0500 Subject: [PATCH] blah --- .github/workflows/deploy.yml | 2 +- ...nning_covariance_with_dictionary_inputs.py | 4 +-- precise/skaters/covariance/ewaempfactory.py | 25 ++++++++-------- precise/skaters/covariance/ewalwfactory.py | 2 +- precise/skaters/covariance/ewalzfactory.py | 2 +- precise/skaters/covariance/ewapmfactory.py | 8 ++--- precise/skaters/covarianceutil/conventions.py | 2 +- .../skaters/covarianceutil/covfunctions.py | 8 ++--- precise/skaters/locationutil/hubermean.py | 30 +++++++++---------- .../skaters/locationutil/vectorfunctions.py | 2 +- precise/skaters/managerutil/ratcheting.py | 2 +- tests/covariance/test_ema.py | 6 ++-- tests/scalar/test_huber_asym.py | 4 +-- .../dictionaries/debug_skatercovariance.py | 4 +-- 14 files changed, 51 insertions(+), 50 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 00be4771..98a8a0be 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: '3.y' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/examples_dictionary_usage/running_covariance_with_dictionary_inputs.py b/examples_dictionary_usage/running_covariance_with_dictionary_inputs.py index 579d34cf..9f7c8a92 100644 --- a/examples_dictionary_usage/running_covariance_with_dictionary_inputs.py +++ b/examples_dictionary_usage/running_covariance_with_dictionary_inputs.py @@ -60,12 +60,12 @@ def f(y, s, k=1): ] # Update with data points for x in data_stream: - print({'x':x}) + print({'y':x}) skater_cov.update(x=x, f=f) # Second lap for x in data_stream: - print({'x': x}) + print({'y': x}) skater_cov.update(x=x,f=f) # Retrieve correlation matrix diff --git a/precise/skaters/covariance/ewaempfactory.py b/precise/skaters/covariance/ewaempfactory.py index aa8d4da9..6d0934e2 100644 --- a/precise/skaters/covariance/ewaempfactory.py +++ b/precise/skaters/covariance/ewaempfactory.py @@ -3,28 +3,28 @@ import math from typing import Union, List -# Exponential weighted sample covariancecomparisonutil +# Exponential weighted sample covariance def ewa_emp_pcov_factory(y, s:dict, k=1, r=0.025, n_emp=None, e=1): assert k==1 - s = ema_scov(s=s,x=y,r=r, n_emp=n_emp) + s = ema_scov(s=s, y=y, r=r, n_emp=n_emp) x = s['mean'] x_cov = s['pcov'] return x, x_cov, s -def ema_scov(s:dict, x:Union[List[float], int]=None, r:float=0.025, n_emp=None): - """ Maintain running population covariancecomparisonutil """ +def ema_scov(s:dict, y:Union[List[float], int]=None, r:float=0.025, n_emp=None): + """ Maintain running population covariance """ if s.get('n_samples') is None: - if isinstance(x,int): - return _ema_scov_init(n_dim=x,r=r, n_emp=n_emp) - elif isinstance(x,(List,np.ndarray)): - s = _ema_scov_init(n_dim=len(x),r=r, n_emp=n_emp) + if isinstance(y, int): + return _ema_scov_init(n_dim=y, r=r, n_emp=n_emp) + elif isinstance(y, (List, np.ndarray)): + s = _ema_scov_init(n_dim=len(y), r=r, n_emp=n_emp) else: - raise ValueError('Not sure how to initialize EWA COV tracker. Supply x=5 say, for 5 dim') - if x is not None: - s = _ema_scov_update(s=s, x=x, r=r) + raise ValueError('Not sure how to initialize EWA COV tracker. Supply y=5 say, for 5 dim') + if y is not None: + s = _ema_scov_update(s=s, x=y, r=r) return s @@ -50,9 +50,10 @@ def _ema_scov_update(s:dict, x:[float], r:float=None, target=None, y=None): If target is not None, it will be used in place of the mean when updating Obviously, this changes the interpretation of 'scov' - xt - transpose of x + xt - transpose of y """ + x = np.array(x) if s['n_samples']< s['n_emp']: # Use the regular cov update for a burn-in period # During this time both scov and pcov are maintained diff --git a/precise/skaters/covariance/ewalwfactory.py b/precise/skaters/covariance/ewalwfactory.py index a3574846..4900d0b8 100644 --- a/precise/skaters/covariance/ewalwfactory.py +++ b/precise/skaters/covariance/ewalwfactory.py @@ -46,7 +46,7 @@ def _lw_ema_scov_update(s, x, r): """ # Uses buffered LD up to 2*n_emp observations, then switches to an updating scheme x = np.asarray(x) - s['ema_scov'] = ema_scov(s=s['ema_scov'], x=x, r=r) + s['ema_scov'] = ema_scov(s=s['ema_scov'], y=x, r=r) s['buffer'].append(x) if len(s['buffer'])>s['ema_scov']['n_emp']: # Update running estimate of the LD shrinkage parameter diff --git a/precise/skaters/covariance/ewalzfactory.py b/precise/skaters/covariance/ewalzfactory.py index 2fd2099a..0d7bd41d 100644 --- a/precise/skaters/covariance/ewalzfactory.py +++ b/precise/skaters/covariance/ewalzfactory.py @@ -91,7 +91,7 @@ def lz_factory(y, s:dict, n_epoch=DEFAULT_N_EPOCH, :param adj_func: Takes pre matrix and returns binary of same shape (where non-zeros should go) :param local_pre_func: Function to take local cov matrices and produce precision matrices :param global_cov_func: Function to take global precision matrix and produce global cov matrix - :return: x, x_cov, s + :return: y, x_cov, s """ # Mildly tedious stuff... diff --git a/precise/skaters/covariance/ewapmfactory.py b/precise/skaters/covariance/ewapmfactory.py index a07ae833..514ba400 100644 --- a/precise/skaters/covariance/ewapmfactory.py +++ b/precise/skaters/covariance/ewapmfactory.py @@ -7,7 +7,7 @@ # If no target is supplied, either initially or for the update call, then a running mean will be used. -QUADRANTS = {'cu':(1.0,1,1), # x*1 > 0 y*1 > 0 +QUADRANTS = {'cu':(1.0,1,1), # y*1 > 0 y*1 > 0 'du':(-1.0,-1,1), 'dl':(-1.0,1,-1), 'cl':(1.0,-1,-1)} @@ -30,7 +30,7 @@ def partial_ema_scov(s:dict, x:Union[List[float], int]=None, r:float=0.025, targ elif len(x)>1: s = _partial_ema_scov_init(n_dim=len(x), r=r, target=target, n_emp=n_emp) else: - raise ValueError('Not sure how to initialize EWA COV tracker. Supply x=5 say, for 5 dim') + raise ValueError('Not sure how to initialize EWA COV tracker. Supply y=5 say, for 5 dim') if x is not None: s = _partial_ema_scov_update(s=s, x=x, r=r) return s @@ -71,8 +71,8 @@ def _partial_ema_scov_update(s:dict, x:[float], r:float=None, target=None): # Update running partial scatter estimates for q,(w,sgn1,sgn2) in QUADRANTS.items(): # Morally: - # x1 = max(0, (x-target)*sgn1) * sgn1 - # x2 = (np.max(0, (x-target)*sgn2) * sgn2) if sgn1!=sgn2 else x1 + # x1 = max(0, (y-target)*sgn1) * sgn1 + # x2 = (np.max(0, (y-target)*sgn2) * sgn2) if sgn1!=sgn2 else x1 x1 = (x-target)*sgn1 x2 = (x-target)*sgn2 x1[x1<0]=0 diff --git a/precise/skaters/covarianceutil/conventions.py b/precise/skaters/covarianceutil/conventions.py index 75f9d00c..02d552cd 100644 --- a/precise/skaters/covarianceutil/conventions.py +++ b/precise/skaters/covarianceutil/conventions.py @@ -20,7 +20,7 @@ def infer_dimension(n_dim:int=None, x:X_TYPE=None, **ignore)->int: elif len(x)>1: return len(x) else: - raise ValueError('Ambiguity in number of variables. Try supplying x or n_dim') + raise ValueError('Ambiguity in number of variables. Try supplying y or n_dim') def is_data(x): diff --git a/precise/skaters/covarianceutil/covfunctions.py b/precise/skaters/covarianceutil/covfunctions.py index 82c5c832..b33168dc 100644 --- a/precise/skaters/covarianceutil/covfunctions.py +++ b/precise/skaters/covarianceutil/covfunctions.py @@ -354,9 +354,9 @@ def _schur_complement_direct(A, B, C, D, gamma): def inverse_multiply(a, b, warn=False, throw=False): - # Want x = a^{-1} b - # a x = b - # x = solve(a,b) + # Want y = a^{-1} b + # a y = b + # y = solve(a,b) x = np.linalg.solve(a, b) if (warn or throw): if np.linalg.matrix_rank(x)[float]: """ Compute a columnwise pseudo-mean of xs, by minimizing a generalized Huber error that is - proportional to x^2 near zero and asymptotes to |x| as |x|->infinity. - f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b ) - This is the same as the function below, except the parameter a will multiply std(x) + proportional to y^2 near zero and asymptotes to |y| as |y|->infinity. + f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b ) + This is the same as the function below, except the parameter a will multiply std(y) :param xs: (n_samples, n_vars) :param a: Generalized Huber parameter as per formula :param b: Generalized Huber parameter as per formula above, scalar or (nvars,) @@ -36,11 +36,11 @@ def huber_mean_absolute_params(xs:[[float]], a, b, n_iter=20, atol=1e-8, with_fr Thus the result mu might be compared to np.mean(xs, axis=0) The function being minimized w.r.t mu is - f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b ) - and for |x|->0 this has asymptote: - f(x) -> log(2+b)/a + a/(2+b) * (x-mu)^2 - whereas for |x|->infinity - f(x) -> |x-mu| + f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b ) + and for |y|->0 this has asymptote: + f(y) -> log(2+b)/a + a/(2+b) * (y-mu)^2 + whereas for |y|->infinity + f(y) -> |y-mu| This Huber function is not the standard Huber loss https://en.wikipedia.org/wiki/Huber_loss Rather, it is based on https://arxiv.org/pdf/2108.12627.pdf @@ -64,7 +64,7 @@ def huber_mean_absolute_params(xs:[[float]], a, b, n_iter=20, atol=1e-8, with_fr def huber_deriv(mu, a, b, xs): """ Derivative of generalized Huber loss w.r.t. mu - f'(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b ) + f'(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b ) :param mu: (n_samples,) # Vector of location parameters :param xs : (n_samples,n_vars) # Data @@ -83,8 +83,8 @@ def huber_deriv(mu, a, b, xs): def huber_abs_error(mu, a, b, xs): - """ Generalized Huber loss which is "like" abs error, as it approaches |x-mu| as |x-mu|-> infinity - f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b ) + """ Generalized Huber loss which is "like" abs error, as it approaches |y-mu| as |y-mu|-> infinity + f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b ) """ n_samples, n_vars = np.shape(xs) mu_rep = np.tile(np.atleast_2d(mu), (n_samples, 1)) @@ -104,14 +104,14 @@ def mean_huber_linear_error(mu, a, b, xs): def huber_squared_error(mu, a, b, xs): """ Rescaled generalized Huber loss which is "like" squared error, in - the sense that it approaches (x-mu)^2 as |x-mu|-> 0 + the sense that it approaches (y-mu)^2 as |y-mu|-> 0 If - f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b ) + f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b ) Then - f(x) -> log(2+b)/a + a/(2+b) * x^2 as x->0 + f(y) -> log(2+b)/a + a/(2+b) * y^2 as y->0 by Taylor. So we define - g(x) := ( f(x) - log(2+b)/a ) * (2+b)/a + g(y) := ( f(y) - log(2+b)/a ) * (2+b)/a """ f = huber_abs_error(mu=mu,a=a,b=b,xs=xs) diff --git a/precise/skaters/locationutil/vectorfunctions.py b/precise/skaters/locationutil/vectorfunctions.py index dcc8009f..f1b4d050 100644 --- a/precise/skaters/locationutil/vectorfunctions.py +++ b/precise/skaters/locationutil/vectorfunctions.py @@ -18,7 +18,7 @@ def normalize(x): def scatter(x): """ - matrix x x^t + matrix y y^t """ x1 = np.atleast_2d(x) xt = np.transpose(x1) diff --git a/precise/skaters/managerutil/ratcheting.py b/precise/skaters/managerutil/ratcheting.py index 7a7daafd..4cd2b44f 100644 --- a/precise/skaters/managerutil/ratcheting.py +++ b/precise/skaters/managerutil/ratcheting.py @@ -10,7 +10,7 @@ def ratchet_portfolios(ys, w, w_lower, w_upper, min_dw=1e-6)->([float], dict): Process a month of data (say) with ratcheting trading to upper and lower envelope - :param ys: num_days x num_assets log returns + :param ys: num_days y num_assets log returns :param w: portfolio at start of period :param w_lower: :param w_upper: diff --git a/tests/covariance/test_ema.py b/tests/covariance/test_ema.py index 76ec3fc2..42287a70 100644 --- a/tests/covariance/test_ema.py +++ b/tests/covariance/test_ema.py @@ -16,20 +16,20 @@ def test_same_burn_in(): sm = {} for x in data: - sm = ema_scov(s=sm, x=x) + sm = ema_scov(s=sm, y=x) assert np.isclose(se['pcov'], sm['pcov']).all() def test_diag(): - data = create_correlated_dataset(19, (2.2, 4.4, 1.5), np.array([[0.2, 0.5, 0.7], [0.3, 0.2, 0.2], [0.5, 0.3, 0.1]]), + data = create_correlated_dataset(100, (2.2, 4.4, 1.5), np.array([[0.2, 0.5, 0.7], [0.3, 0.2, 0.2], [0.5, 0.3, 0.1]]), (1, 5, 3)) rho = 0.05 r0 = {} r1 = {} n_emp = 10 for k,x in enumerate(data): - r1 = ema_scov(s=r1, x=x,r=rho, n_emp=n_emp) + r1 = ema_scov(s=r1, y=x, r=rho, n_emp=n_emp) r0 = rvar(m=r0,x=x[0],rho=rho, n=n_emp) if k >= 2: c = r1['scov'][0, 0] diff --git a/tests/scalar/test_huber_asym.py b/tests/scalar/test_huber_asym.py index ad8b2023..dbc7495f 100644 --- a/tests/scalar/test_huber_asym.py +++ b/tests/scalar/test_huber_asym.py @@ -7,8 +7,8 @@ def huber_ratio(x): b = 1.0 f = math.log(math.exp(a * x) + math.exp(-(a * x)) + b) / a c = math.log(2 + b) / a - d = a / (2 + b) # f-c -> d x**2 - g = (f - c) / d # g -> x**2 + d = a / (2 + b) # f-c -> d y**2 + g = (f - c) / d # g -> y**2 h = x ** 2 r = g / h return r diff --git a/tests/skatertools/dictionaries/debug_skatercovariance.py b/tests/skatertools/dictionaries/debug_skatercovariance.py index 579d34cf..9f7c8a92 100644 --- a/tests/skatertools/dictionaries/debug_skatercovariance.py +++ b/tests/skatertools/dictionaries/debug_skatercovariance.py @@ -60,12 +60,12 @@ def f(y, s, k=1): ] # Update with data points for x in data_stream: - print({'x':x}) + print({'y':x}) skater_cov.update(x=x, f=f) # Second lap for x in data_stream: - print({'x': x}) + print({'y': x}) skater_cov.update(x=x,f=f) # Retrieve correlation matrix