Skip to content

Commit

Permalink
Merge pull request #480 from nsmlzl/pr_zeta_precompute
Browse files Browse the repository at this point in the history
Optimization of zeta preprocessing
  • Loading branch information
AndreaGuarracino authored Apr 19, 2023
2 parents 8c5541f + dc81233 commit 34f006f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 31 deletions.
24 changes: 8 additions & 16 deletions src/algorithms/path_sgd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,27 +121,19 @@ namespace odgi {
iter_with_max_learning_rate,
eps);

// cache zipf zetas for our full path space (heavy, but one-off)
// cache zipf zetas for our full path space
if (progress) {
std::cerr << "[odgi::path_linear_sgd] calculating zetas for " << (space <= space_max ? space : space_max + (space - space_max) / space_quantization_step + 1) << " zipf distributions" << std::endl;
}

std::vector<double> zetas((space <= space_max ? space : space_max + (space - space_max) / space_quantization_step + 1)+1);
uint64_t last_quantized_i = 0;
#pragma omp parallel for schedule(static,1)
for (uint64_t i = 1; i < space+1; ++i) {
uint64_t quantized_i = i;
uint64_t compressed_space = i;
if (i > space_max){
quantized_i = space_max + (i - space_max) / space_quantization_step + 1;
compressed_space = space_max + ((i - space_max) / space_quantization_step) * space_quantization_step;
double zeta_tmp = 0.0;
for (uint64_t i = 1; i < space + 1; i++) {
zeta_tmp += dirtyzipf::fast_precise_pow(1.0 / i, theta);
if (i <= space_max) {
zetas[i] = zeta_tmp;
}

if (quantized_i != last_quantized_i){
dirtyzipf::dirty_zipfian_int_distribution<uint64_t>::param_type z_p(1, compressed_space, theta);
zetas[quantized_i] = z_p.zeta();

last_quantized_i = quantized_i;
if (i >= space_max && (i - space_max) % space_quantization_step == 0) {
zetas[space_max + 1 + (i - space_max) / space_quantization_step] = zeta_tmp;
}
}

Expand Down
23 changes: 8 additions & 15 deletions src/algorithms/path_sgd_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,16 @@ namespace odgi {
iter_with_max_learning_rate,
eps);

// cache zipf zetas for our full path space (heavy, but one-off)
// cache zipf zetas for our full path space
std::vector<double> zetas((space <= space_max ? space : space_max + (space - space_max) / space_quantization_step + 1)+1);
uint64_t last_quantized_i = 0;
#pragma omp parallel for schedule(static,1)
for (uint64_t i = 1; i < space+1; ++i) {
uint64_t quantized_i = i;
uint64_t compressed_space = i;
if (i > space_max){
quantized_i = space_max + (i - space_max) / space_quantization_step + 1;
compressed_space = space_max + ((i - space_max) / space_quantization_step) * space_quantization_step;
double zeta_tmp = 0.0;
for (uint64_t i = 1; i < space + 1; i++) {
zeta_tmp += dirtyzipf::fast_precise_pow(1.0 / i, theta);
if (i <= space_max) {
zetas[i] = zeta_tmp;
}

if (quantized_i != last_quantized_i){
dirtyzipf::dirty_zipfian_int_distribution<uint64_t>::param_type z_p(1, compressed_space, theta);
zetas[quantized_i] = z_p.zeta();

last_quantized_i = quantized_i;
if (i >= space_max && (i - space_max) % space_quantization_step == 0) {
zetas[space_max + 1 + (i - space_max) / space_quantization_step] = zeta_tmp;
}
}

Expand Down

0 comments on commit 34f006f

Please sign in to comment.