From 764fa8797b962aa68f036a3a17cd85550d395543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marius=20Miku=C4=8Dionis?= Date: Wed, 1 Nov 2023 10:20:47 +0100 Subject: [PATCH 1/2] Revert "mimicking old Mlearning setup" This reverts commit 949204932e25eea91269717d1a471ba6f621368a. --- src/MLearning.cpp | 54 +++++++++++++++++------------------------------ src/structs.h | 11 ++++------ 2 files changed, 23 insertions(+), 42 deletions(-) diff --git a/src/MLearning.cpp b/src/MLearning.cpp index 2173dc7..7b7aabe 100644 --- a/src/MLearning.cpp +++ b/src/MLearning.cpp @@ -242,11 +242,13 @@ namespace prlearn { avg_t mean, old_mean; std::vector sample_qvar; std::vector old_var; + avg_t svar, ovar; + double fut = 0; for (auto& s : _samples) { auto best = minimize ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); - double var = 0; + double squared = 0; if (s._size == 0 || s._cloud == 0 || discount == 0) { best = 0; } else { @@ -255,10 +257,10 @@ namespace prlearn { auto c = clouds[s._cloud]._nodes[s._nodes[i]]._q.avg(); fut = std::min(fut, c); if (c == best) - var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q.variance()); + squared = std::min(squared, clouds[s._cloud]._nodes[s._nodes[i]]._q.squared()); else if ((c < best && minimize) || (c > best && !minimize)) { best = c; - var = clouds[s._cloud]._nodes[s._nodes[i]]._q.variance(); + squared = clouds[s._cloud]._nodes[s._nodes[i]]._q.squared(); } } } @@ -269,14 +271,14 @@ namespace prlearn { best *= discount; // dont look too far into the future for the variance. // if we do, it will grow in horrible ways and be useless. - var *= std::min(0.5, discount); + squared *= std::min(0.5, discount); for (size_t d = 0; d < dimen; ++d) { if (s._variance) { auto v = s._variance[d]; v.first.avg() += best; v.second.avg() += best; - v.first.set_variance(std::max(v.first.variance(), var)); - v.second.set_variance(std::max(v.second.variance(), var)); + v.first.squared() = std::max(v.first.squared(), squared); + v.second.squared() = std::max(v.second.squared(), squared); tmpq[d].first.addPoints(v.first.cnt(), v.first.avg()); tmpq[d].second.addPoints(v.second.cnt(), v.second.avg()); mean.addPoints(v.first.cnt(), v.first.avg()); @@ -288,8 +290,8 @@ namespace prlearn { auto v = s._old[d]; v.first.avg() += best; v.second.avg() += best; - v.first.set_variance(std::max(v.first.variance(), var)); - v.second.set_variance(std::max(v.second.variance(), var)); + v.first.squared() = std::max(v.first.squared(), squared); + v.second.squared() = std::max(v.second.squared(), squared); old_mean.addPoints(v.first.cnt(), v.first.avg()); old_mean.addPoints(v.second.cnt(), v.second.avg()); old_var.push_back(v.first); @@ -298,50 +300,32 @@ namespace prlearn { } } - avg_t svar, ovar; + auto vars = std::make_unique < avg_t[]>(dimen * 2); bool first = true; size_t dimcnt = 0; for (auto& s : sample_qvar) { - { - const auto dif = std::abs(s.avg() - mean._avg); - const auto std = std::sqrt(s.variance()); - auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; - svar.addPoints(s.cnt(), var); - } auto id = dimcnt; - auto dmin = tmpq[id].first.avg(); if (!first) { - dmin = tmpq[dimcnt].second.avg(); id = dimen + dimcnt; } - { - const auto dif = std::abs(s.avg() - dmin); - const auto std = std::sqrt(s.variance()); - auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; - vars[id].addPoints(s.cnt(), var); - } + vars[id].addPoints(s.cnt(), s.squared()); if (!first) dimcnt = (dimcnt + 1) % dimen; first = !first; + svar.addPoints(s.cnt(), s.squared()); } - for (auto& s : old_var) { - const auto dif = std::abs(s.avg() - old_mean._avg); - const auto std = std::sqrt(s.variance()); - auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; - ovar.addPoints(s.cnt(), var); - } + for (auto& s : old_var) + ovar.addPoints(s.cnt(), s.squared()); for (size_t i = 0; i < dimen; ++i) { - tmpq[i].first.set_variance(vars[i]._avg); - tmpq[i].second.set_variance(vars[i + dimen]._avg); + tmpq[i].first.squared() = vars[i]._avg; + tmpq[i].second.squared() = vars[i + dimen]._avg; } - qvar_t nq(mean._avg, mean._cnt / (dimen * 2), 0); - nq.set_variance(svar._avg); - qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), 0); - oq.set_variance(ovar._avg); + qvar_t nq(mean._avg, mean._cnt / (dimen * 2), svar._avg); + qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), ovar._avg); return std::make_pair(nq, oq); } diff --git a/src/structs.h b/src/structs.h index fbbfe5f..7b9d230 100644 --- a/src/structs.h +++ b/src/structs.h @@ -132,16 +132,13 @@ namespace prlearn { auto pow = std::pow(_avg, 2.0); if(pow >= _sq) return 0; - return _sq - pow; + auto var = std::sqrt(_sq - pow); + return var; } - void set_variance(double var) { - _sq = std::pow(_avg, 2.0) + var; - } - - double& squared() { + double& squared() { return _sq; - } + } const double& squared() const { return _sq; From 8341ca80206ea9e0520c3fb0262c0c95edec41f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marius=20Miku=C4=8Dionis?= Date: Wed, 1 Nov 2023 10:21:56 +0100 Subject: [PATCH 2/2] Revert "switched to correct computation of variance" This reverts commit 1b9e4f1e88ec0e9bdffe127311939942b4fed15e. --- src/MLearning.cpp | 48 ++++++++++++++++++++++++++-------------- src/RefinementTree.cpp | 6 ++--- src/SimpleMLearning.cpp | 4 ++-- src/SimpleRegressor.h | 2 +- src/structs.cpp | 49 ++++++++++++++++++++++++++++++----------- src/structs.h | 35 +++++++---------------------- 6 files changed, 81 insertions(+), 63 deletions(-) diff --git a/src/MLearning.cpp b/src/MLearning.cpp index 7b7aabe..495d254 100644 --- a/src/MLearning.cpp +++ b/src/MLearning.cpp @@ -242,13 +242,11 @@ namespace prlearn { avg_t mean, old_mean; std::vector sample_qvar; std::vector old_var; - avg_t svar, ovar; - double fut = 0; for (auto& s : _samples) { auto best = minimize ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); - double squared = 0; + double var = 0; if (s._size == 0 || s._cloud == 0 || discount == 0) { best = 0; } else { @@ -257,10 +255,10 @@ namespace prlearn { auto c = clouds[s._cloud]._nodes[s._nodes[i]]._q.avg(); fut = std::min(fut, c); if (c == best) - squared = std::min(squared, clouds[s._cloud]._nodes[s._nodes[i]]._q.squared()); + var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q._variance); else if ((c < best && minimize) || (c > best && !minimize)) { best = c; - squared = clouds[s._cloud]._nodes[s._nodes[i]]._q.squared(); + var = clouds[s._cloud]._nodes[s._nodes[i]]._q._variance; } } } @@ -271,14 +269,14 @@ namespace prlearn { best *= discount; // dont look too far into the future for the variance. // if we do, it will grow in horrible ways and be useless. - squared *= std::min(0.5, discount); + var *= std::min(0.5, discount); for (size_t d = 0; d < dimen; ++d) { if (s._variance) { auto v = s._variance[d]; v.first.avg() += best; v.second.avg() += best; - v.first.squared() = std::max(v.first.squared(), squared); - v.second.squared() = std::max(v.second.squared(), squared); + v.first._variance = std::max(v.first._variance, var); + v.second._variance = std::max(v.second._variance, var); tmpq[d].first.addPoints(v.first.cnt(), v.first.avg()); tmpq[d].second.addPoints(v.second.cnt(), v.second.avg()); mean.addPoints(v.first.cnt(), v.first.avg()); @@ -290,8 +288,8 @@ namespace prlearn { auto v = s._old[d]; v.first.avg() += best; v.second.avg() += best; - v.first.squared() = std::max(v.first.squared(), squared); - v.second.squared() = std::max(v.second.squared(), squared); + v.first._variance = std::max(v.first._variance, var); + v.second._variance = std::max(v.second._variance, var); old_mean.addPoints(v.first.cnt(), v.first.avg()); old_mean.addPoints(v.second.cnt(), v.second.avg()); old_var.push_back(v.first); @@ -300,28 +298,44 @@ namespace prlearn { } } - + avg_t svar, ovar; auto vars = std::make_unique < avg_t[]>(dimen * 2); bool first = true; size_t dimcnt = 0; for (auto& s : sample_qvar) { + { + const auto dif = std::abs(s.avg() - mean._avg); + const auto std = std::sqrt(s._variance); + auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; + svar.addPoints(s.cnt(), var); + } auto id = dimcnt; + auto dmin = tmpq[id].first.avg(); if (!first) { + dmin = tmpq[dimcnt].second.avg(); id = dimen + dimcnt; } - vars[id].addPoints(s.cnt(), s.squared()); + { + const auto dif = std::abs(s.avg() - dmin); + const auto std = std::sqrt(s._variance); + auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; + vars[id].addPoints(s.cnt(), var); + } if (!first) dimcnt = (dimcnt + 1) % dimen; first = !first; - svar.addPoints(s.cnt(), s.squared()); } - for (auto& s : old_var) - ovar.addPoints(s.cnt(), s.squared()); + for (auto& s : old_var) { + const auto dif = std::abs(s.avg() - old_mean._avg); + const auto std = std::sqrt(s._variance); + auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; + ovar.addPoints(s.cnt(), var); + } for (size_t i = 0; i < dimen; ++i) { - tmpq[i].first.squared() = vars[i]._avg; - tmpq[i].second.squared() = vars[i + dimen]._avg; + tmpq[i].first._variance = vars[i]._avg; + tmpq[i].second._variance = vars[i + dimen]._avg; } qvar_t nq(mean._avg, mean._cnt / (dimen * 2), svar._avg); diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp index 0c8c26a..8162b58 100644 --- a/src/RefinementTree.cpp +++ b/src/RefinementTree.cpp @@ -69,7 +69,7 @@ namespace prlearn { return qvar_t(std::numeric_limits::quiet_NaN(), 0, 0); auto n = _nodes[res->_nid].get_leaf(point, res->_nid, _nodes); auto& node = _nodes[n]; - return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q.squared()); + return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q._variance); } double RefinementTree::getBestQ(const double* point, bool minimization, size_t* next_labels, size_t n_labels) const { @@ -231,12 +231,12 @@ namespace prlearn { if (nodes[slow]._predictor._q.cnt() == 0) { nodes[slow]._predictor._q.cnt() = 1; nodes[slow]._predictor._q.avg() = oq.avg(); - nodes[slow]._predictor._q.squared() = std::pow(oq.avg(), 2.0); + nodes[slow]._predictor._q._variance = 0; } if (nodes[shigh]._predictor._q.cnt() == 0) { nodes[shigh]._predictor._q.cnt() = 1; nodes[shigh]._predictor._q.avg() = oq.avg(); - nodes[shigh]._predictor._q.squared() = std::pow(oq.avg(), 2.0); + nodes[shigh]._predictor._q._variance = 0; } } nodes[shigh]._predictor._cnt = nodes[shigh]._predictor._q.cnt(); diff --git a/src/SimpleMLearning.cpp b/src/SimpleMLearning.cpp index 65b1da9..6ce3774 100644 --- a/src/SimpleMLearning.cpp +++ b/src/SimpleMLearning.cpp @@ -110,14 +110,14 @@ namespace prlearn { for(auto& s : n._succssors) { const auto dif = std::abs(s._cost.avg() - nq._avg); - const auto std = std::sqrt(s._cost.variance()); + const auto std = std::sqrt(s._cost._variance); auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0; nv.addPoints(s._cost.cnt(), var); } n._q = qvar_t(nq._avg, nq._cnt, nv._avg); if ((minimization && n._q.avg() <= rq.avg()) || (!minimization && n._q.avg() >= rq.avg())) { - if(n._q.avg() != rq.avg() || n._q.variance() < rq.variance() || n._q.cnt() > rq.cnt()) + if(n._q.avg() != rq.avg() || n._q._variance < rq._variance || n._q.cnt() > rq.cnt()) rq = n._q; } } diff --git a/src/SimpleRegressor.h b/src/SimpleRegressor.h index 25f2154..239dbc2 100644 --- a/src/SimpleRegressor.h +++ b/src/SimpleRegressor.h @@ -47,7 +47,7 @@ namespace prlearn { auto res = std::lower_bound(std::begin(_labels), std::end(_labels), lf); if (res != std::end(_labels) && res->_label == label) - return qvar_t{res->_value.avg(), (double)res->_cnt, res->_value.squared()}; + return qvar_t{res->_value.avg(), (double)res->_cnt, res->_value._variance}; else return qvar_t{std::numeric_limits::quiet_NaN(), 0, 0}; } diff --git a/src/structs.cpp b/src/structs.cpp index dc1d056..b4c3692 100644 --- a/src/structs.cpp +++ b/src/structs.cpp @@ -1,21 +1,21 @@ /* * Copyright Peter G. Jensen - * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . */ -/* +/* * File: structs.cpp * Author: Peter G. Jensen * @@ -44,7 +44,7 @@ namespace prlearn { void qvar_t::print(std::ostream& stream) const { stream << "["; stream << (*(avg_t*)this); - stream << ", " << variance() << "]"; + stream << ", " << _variance << "]"; } std::ostream& operator<<(std::ostream& o, const qvar_t& v) { @@ -59,15 +59,29 @@ namespace prlearn { return a; qvar_t res = a; res.addPoints(b._cnt, b._avg); - res._sq = (a._sq * (a._cnt / res._cnt)) + (b._sq * (b._cnt / res._cnt)); + const auto adif = std::abs(res._avg - a._avg); + const auto bdif = std::abs(res._avg - b._avg); + const auto astd = std::sqrt(a._variance); + const auto bstd = std::sqrt(b._variance); + auto ca = std::pow(adif + astd, 2.0) + std::pow(adif - astd, 2.0); + auto cb = std::pow(bdif + bstd, 2.0) + std::pow(bdif - bstd, 2.0); + avg_t tmp; + tmp.addPoints(a._cnt, ca / 2.0); + tmp.addPoints(b._cnt, cb / 2.0); + res._variance = tmp._avg; return res; } qvar_t& qvar_t::operator+=(double d) { assert(!std::isinf(d)); avg_t::operator+=(d); - auto diff = std::pow(d, 2.0) - _sq; - _sq += diff / _cnt; + auto nvar = std::pow(d - _avg, 2.0); + assert(!std::isinf(nvar)); + if (_cnt == 1) _variance = nvar; + else { + nvar -= _variance; + _variance += nvar / _cnt; + } return *this; } @@ -75,9 +89,18 @@ namespace prlearn { assert(weight >= 0); assert(_cnt >= 0); if (weight == 0) return; + auto oa = _avg; avg_t::addPoints(weight, d); - auto diff = std::pow(d, 2.0) - _sq; - _sq += diff * (weight / _cnt); + auto nvar = std::abs((d - oa)*(d - _avg)); + assert(!std::isinf(nvar)); + if (_cnt == weight) _variance = nvar; + else { + nvar -= _variance; + _variance += (nvar * weight) / _cnt; + } + assert(_variance >= 0); + assert(!std::isnan(_variance)); + assert(!std::isinf(_variance)); } double triangular_cdf(double mid, double width, double point) { @@ -94,10 +117,10 @@ namespace prlearn { constexpr double minvar = 0.0001; if (std::min(a.cnt(), b.cnt()) <= 1) return; - if (a.variance() == b.variance() && a.avg() == b.avg()) + if (a._variance == b._variance && a.avg() == b.avg()) return; - auto vara = std::max(minvar, a.variance()); - auto varb = std::max(minvar, b.variance()); + auto vara = std::max(minvar, a._variance); + auto varb = std::max(minvar, b._variance); double tval = std::abs(a.avg() - b.avg()) / std::sqrt(((vara * a.cnt()) + (varb * b.cnt())) / (a.cnt() * b.cnt())); diff --git a/src/structs.h b/src/structs.h index 7b9d230..6ca44fa 100644 --- a/src/structs.h +++ b/src/structs.h @@ -1,21 +1,21 @@ /* * Copyright Peter G. Jensen - * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . */ -/* +/* * File: structs.h * Author: Peter G. Jensen * @@ -33,8 +33,6 @@ #include #include #include -#include - namespace prlearn { struct avg_t { @@ -56,7 +54,7 @@ namespace prlearn { } else { _cnt += weight; double diff = d - _avg; - _avg += diff * (weight / _cnt); // add only "share" of difference + _avg += ((diff * weight) / (double) _cnt); // add only "share" of difference } assert(!std::isnan(_avg)); } @@ -98,14 +96,15 @@ namespace prlearn { qvar_t() = default; - qvar_t(double d, double w, double squared) { + qvar_t(double d, double w, double v) { _avg = d; _cnt = w; - _sq = squared; + _variance = v; }; // this is a dirty hijack! qvar_t& operator+=(double d); void addPoints(double weight, double d); + double _variance = 0; auto& avg() { return _avg; @@ -128,24 +127,6 @@ namespace prlearn { } void print(std::ostream& stream) const; static qvar_t approximate(const qvar_t& a, const qvar_t& b); - double variance() const { - auto pow = std::pow(_avg, 2.0); - if(pow >= _sq) - return 0; - auto var = std::sqrt(_sq - pow); - return var; - } - - double& squared() { - return _sq; - } - - const double& squared() const { - return _sq; - } - - private: - double _sq = 0; }; struct splitfilter_t {