Skip to content

Commit

Permalink
blah
Browse files Browse the repository at this point in the history
microprediction committed Dec 3, 2024
1 parent 4f8391f commit 9b73eaa
Showing 14 changed files with 51 additions and 50 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
python-version: '3.y'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Original file line number Diff line number Diff line change
@@ -60,12 +60,12 @@ def f(y, s, k=1):
]
# Update with data points
for x in data_stream:
print({'x':x})
print({'y':x})
skater_cov.update(x=x, f=f)

# Second lap
for x in data_stream:
print({'x': x})
print({'y': x})
skater_cov.update(x=x,f=f)

# Retrieve correlation matrix
25 changes: 13 additions & 12 deletions precise/skaters/covariance/ewaempfactory.py
Original file line number Diff line number Diff line change
@@ -3,28 +3,28 @@
import math
from typing import Union, List

# Exponential weighted sample covariancecomparisonutil
# Exponential weighted sample covariance


def ewa_emp_pcov_factory(y, s:dict, k=1, r=0.025, n_emp=None, e=1):
assert k==1
s = ema_scov(s=s,x=y,r=r, n_emp=n_emp)
s = ema_scov(s=s, y=y, r=r, n_emp=n_emp)
x = s['mean']
x_cov = s['pcov']
return x, x_cov, s


def ema_scov(s:dict, x:Union[List[float], int]=None, r:float=0.025, n_emp=None):
""" Maintain running population covariancecomparisonutil """
def ema_scov(s:dict, y:Union[List[float], int]=None, r:float=0.025, n_emp=None):
""" Maintain running population covariance """
if s.get('n_samples') is None:
if isinstance(x,int):
return _ema_scov_init(n_dim=x,r=r, n_emp=n_emp)
elif isinstance(x,(List,np.ndarray)):
s = _ema_scov_init(n_dim=len(x),r=r, n_emp=n_emp)
if isinstance(y, int):
return _ema_scov_init(n_dim=y, r=r, n_emp=n_emp)
elif isinstance(y, (List, np.ndarray)):
s = _ema_scov_init(n_dim=len(y), r=r, n_emp=n_emp)
else:
raise ValueError('Not sure how to initialize EWA COV tracker. Supply x=5 say, for 5 dim')
if x is not None:
s = _ema_scov_update(s=s, x=x, r=r)
raise ValueError('Not sure how to initialize EWA COV tracker. Supply y=5 say, for 5 dim')
if y is not None:
s = _ema_scov_update(s=s, x=y, r=r)
return s


@@ -50,9 +50,10 @@ def _ema_scov_update(s:dict, x:[float], r:float=None, target=None, y=None):
If target is not None, it will be used in place of the mean when updating
Obviously, this changes the interpretation of 'scov'
xt - transpose of x
xt - transpose of y
"""
x = np.array(x)
if s['n_samples']< s['n_emp']:
# Use the regular cov update for a burn-in period
# During this time both scov and pcov are maintained
2 changes: 1 addition & 1 deletion precise/skaters/covariance/ewalwfactory.py
Original file line number Diff line number Diff line change
@@ -46,7 +46,7 @@ def _lw_ema_scov_update(s, x, r):
"""
# Uses buffered LD up to 2*n_emp observations, then switches to an updating scheme
x = np.asarray(x)
s['ema_scov'] = ema_scov(s=s['ema_scov'], x=x, r=r)
s['ema_scov'] = ema_scov(s=s['ema_scov'], y=x, r=r)
s['buffer'].append(x)
if len(s['buffer'])>s['ema_scov']['n_emp']:
# Update running estimate of the LD shrinkage parameter
2 changes: 1 addition & 1 deletion precise/skaters/covariance/ewalzfactory.py
Original file line number Diff line number Diff line change
@@ -91,7 +91,7 @@ def lz_factory(y, s:dict, n_epoch=DEFAULT_N_EPOCH,
:param adj_func: Takes pre matrix and returns binary of same shape (where non-zeros should go)
:param local_pre_func: Function to take local cov matrices and produce precision matrices
:param global_cov_func: Function to take global precision matrix and produce global cov matrix
:return: x, x_cov, s
:return: y, x_cov, s
"""

# Mildly tedious stuff...
8 changes: 4 additions & 4 deletions precise/skaters/covariance/ewapmfactory.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@

# If no target is supplied, either initially or for the update call, then a running mean will be used.

QUADRANTS = {'cu':(1.0,1,1), # x*1 > 0 y*1 > 0
QUADRANTS = {'cu':(1.0,1,1), # y*1 > 0 y*1 > 0
'du':(-1.0,-1,1),
'dl':(-1.0,1,-1),
'cl':(1.0,-1,-1)}
@@ -30,7 +30,7 @@ def partial_ema_scov(s:dict, x:Union[List[float], int]=None, r:float=0.025, targ
elif len(x)>1:
s = _partial_ema_scov_init(n_dim=len(x), r=r, target=target, n_emp=n_emp)
else:
raise ValueError('Not sure how to initialize EWA COV tracker. Supply x=5 say, for 5 dim')
raise ValueError('Not sure how to initialize EWA COV tracker. Supply y=5 say, for 5 dim')
if x is not None:
s = _partial_ema_scov_update(s=s, x=x, r=r)
return s
@@ -71,8 +71,8 @@ def _partial_ema_scov_update(s:dict, x:[float], r:float=None, target=None):
# Update running partial scatter estimates
for q,(w,sgn1,sgn2) in QUADRANTS.items():
# Morally:
# x1 = max(0, (x-target)*sgn1) * sgn1
# x2 = (np.max(0, (x-target)*sgn2) * sgn2) if sgn1!=sgn2 else x1
# x1 = max(0, (y-target)*sgn1) * sgn1
# x2 = (np.max(0, (y-target)*sgn2) * sgn2) if sgn1!=sgn2 else x1
x1 = (x-target)*sgn1
x2 = (x-target)*sgn2
x1[x1<0]=0
2 changes: 1 addition & 1 deletion precise/skaters/covarianceutil/conventions.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ def infer_dimension(n_dim:int=None, x:X_TYPE=None, **ignore)->int:
elif len(x)>1:
return len(x)
else:
raise ValueError('Ambiguity in number of variables. Try supplying x or n_dim')
raise ValueError('Ambiguity in number of variables. Try supplying y or n_dim')


def is_data(x):
8 changes: 4 additions & 4 deletions precise/skaters/covarianceutil/covfunctions.py
Original file line number Diff line number Diff line change
@@ -354,9 +354,9 @@ def _schur_complement_direct(A, B, C, D, gamma):


def inverse_multiply(a, b, warn=False, throw=False):
# Want x = a^{-1} b
# a x = b
# x = solve(a,b)
# Want y = a^{-1} b
# a y = b
# y = solve(a,b)
x = np.linalg.solve(a, b)
if (warn or throw):
if np.linalg.matrix_rank(x)<max(np.shape(a)):
@@ -372,7 +372,7 @@ def inverse_multiply(a, b, warn=False, throw=False):


def multiply_by_inverse(a, b, throw=True):
# Want x = a b^{-1}
# Want y = a b^{-1}
# xt = bt^{-1} at = inverse_multiply(bt, at)
# bt xt = at
# xt = solve(bt, at)
30 changes: 15 additions & 15 deletions precise/skaters/locationutil/hubermean.py
Original file line number Diff line number Diff line change
@@ -7,9 +7,9 @@

def huber_mean(xs:[[float]], a:float=1.0, b=2.0, n_iter=20, atol=1e-8)->[float]:
""" Compute a columnwise pseudo-mean of xs, by minimizing a generalized Huber error that is
proportional to x^2 near zero and asymptotes to |x| as |x|->infinity.
f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b )
This is the same as the function below, except the parameter a will multiply std(x)
proportional to y^2 near zero and asymptotes to |y| as |y|->infinity.
f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b )
This is the same as the function below, except the parameter a will multiply std(y)
:param xs: (n_samples, n_vars)
:param a: Generalized Huber parameter as per formula
:param b: Generalized Huber parameter as per formula above, scalar or (nvars,)
@@ -36,11 +36,11 @@ def huber_mean_absolute_params(xs:[[float]], a, b, n_iter=20, atol=1e-8, with_fr
Thus the result mu might be compared to np.mean(xs, axis=0)
The function being minimized w.r.t mu is
f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b )
and for |x|->0 this has asymptote:
f(x) -> log(2+b)/a + a/(2+b) * (x-mu)^2
whereas for |x|->infinity
f(x) -> |x-mu|
f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b )
and for |y|->0 this has asymptote:
f(y) -> log(2+b)/a + a/(2+b) * (y-mu)^2
whereas for |y|->infinity
f(y) -> |y-mu|
This Huber function is not the standard Huber loss https://en.wikipedia.org/wiki/Huber_loss
Rather, it is based on https://arxiv.org/pdf/2108.12627.pdf
@@ -64,7 +64,7 @@ def huber_mean_absolute_params(xs:[[float]], a, b, n_iter=20, atol=1e-8, with_fr
def huber_deriv(mu, a, b, xs):
""" Derivative of generalized Huber loss w.r.t. mu
f'(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b )
f'(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b )
:param mu: (n_samples,) # Vector of location parameters
:param xs : (n_samples,n_vars) # Data
@@ -83,8 +83,8 @@ def huber_deriv(mu, a, b, xs):


def huber_abs_error(mu, a, b, xs):
""" Generalized Huber loss which is "like" abs error, as it approaches |x-mu| as |x-mu|-> infinity
f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b )
""" Generalized Huber loss which is "like" abs error, as it approaches |y-mu| as |y-mu|-> infinity
f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b )
"""
n_samples, n_vars = np.shape(xs)
mu_rep = np.tile(np.atleast_2d(mu), (n_samples, 1))
@@ -104,14 +104,14 @@ def mean_huber_linear_error(mu, a, b, xs):

def huber_squared_error(mu, a, b, xs):
""" Rescaled generalized Huber loss which is "like" squared error, in
the sense that it approaches (x-mu)^2 as |x-mu|-> 0
the sense that it approaches (y-mu)^2 as |y-mu|-> 0
If
f(x) = 1/a log( exp(a*(x-mu)) + exp(-(a*(x-mu)) + b )
f(y) = 1/a log( exp(a*(y-mu)) + exp(-(a*(y-mu)) + b )
Then
f(x) -> log(2+b)/a + a/(2+b) * x^2 as x->0
f(y) -> log(2+b)/a + a/(2+b) * y^2 as y->0
by Taylor. So we define
g(x) := ( f(x) - log(2+b)/a ) * (2+b)/a
g(y) := ( f(y) - log(2+b)/a ) * (2+b)/a
"""
f = huber_abs_error(mu=mu,a=a,b=b,xs=xs)
2 changes: 1 addition & 1 deletion precise/skaters/locationutil/vectorfunctions.py
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ def normalize(x):

def scatter(x):
"""
matrix x x^t
matrix y y^t
"""
x1 = np.atleast_2d(x)
xt = np.transpose(x1)
2 changes: 1 addition & 1 deletion precise/skaters/managerutil/ratcheting.py
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ def ratchet_portfolios(ys, w, w_lower, w_upper, min_dw=1e-6)->([float], dict):
Process a month of data (say) with ratcheting trading to upper and lower envelope
:param ys: num_days x num_assets log returns
:param ys: num_days y num_assets log returns
:param w: portfolio at start of period
:param w_lower:
:param w_upper:
6 changes: 3 additions & 3 deletions tests/covariance/test_ema.py
Original file line number Diff line number Diff line change
@@ -16,20 +16,20 @@ def test_same_burn_in():

sm = {}
for x in data:
sm = ema_scov(s=sm, x=x)
sm = ema_scov(s=sm, y=x)

assert np.isclose(se['pcov'], sm['pcov']).all()


def test_diag():
data = create_correlated_dataset(19, (2.2, 4.4, 1.5), np.array([[0.2, 0.5, 0.7], [0.3, 0.2, 0.2], [0.5, 0.3, 0.1]]),
data = create_correlated_dataset(100, (2.2, 4.4, 1.5), np.array([[0.2, 0.5, 0.7], [0.3, 0.2, 0.2], [0.5, 0.3, 0.1]]),
(1, 5, 3))
rho = 0.05
r0 = {}
r1 = {}
n_emp = 10
for k,x in enumerate(data):
r1 = ema_scov(s=r1, x=x,r=rho, n_emp=n_emp)
r1 = ema_scov(s=r1, y=x, r=rho, n_emp=n_emp)
r0 = rvar(m=r0,x=x[0],rho=rho, n=n_emp)
if k >= 2:
c = r1['scov'][0, 0]
4 changes: 2 additions & 2 deletions tests/scalar/test_huber_asym.py
Original file line number Diff line number Diff line change
@@ -7,8 +7,8 @@ def huber_ratio(x):
b = 1.0
f = math.log(math.exp(a * x) + math.exp(-(a * x)) + b) / a
c = math.log(2 + b) / a
d = a / (2 + b) # f-c -> d x**2
g = (f - c) / d # g -> x**2
d = a / (2 + b) # f-c -> d y**2
g = (f - c) / d # g -> y**2
h = x ** 2
r = g / h
return r
4 changes: 2 additions & 2 deletions tests/skatertools/dictionaries/debug_skatercovariance.py
Original file line number Diff line number Diff line change
@@ -60,12 +60,12 @@ def f(y, s, k=1):
]
# Update with data points
for x in data_stream:
print({'x':x})
print({'y':x})
skater_cov.update(x=x, f=f)

# Second lap
for x in data_stream:
print({'x': x})
print({'y': x})
skater_cov.update(x=x,f=f)

# Retrieve correlation matrix

0 comments on commit 9b73eaa

Please sign in to comment.