-
Notifications
You must be signed in to change notification settings - Fork 74
v0.2.53..v0.2.54 changeset ScriptMatchCreator.cpp
Garret Voltz edited this page Mar 31, 2020
·
1 revision
diff --git a/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp b/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp
index 2ba58b4..60849a2 100644
--- a/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp
+++ b/hoot-js/src/main/cpp/hoot/js/conflate/matching/ScriptMatchCreator.cpp
@@ -90,18 +90,18 @@ public:
_mt(mt),
_script(script),
_filter(filter),
- _customSearchRadius(-1.0)
+ _customSearchRadius(-1.0),
+ _neighborCountMax(-1),
+ _neighborCountSum(0),
+ _elementsEvaluated(0),
+ _maxGroupSize(0),
+ _numElementsVisited(0),
+ _numMatchCandidatesVisited(0),
+ _taskStatusUpdateInterval(ConfigOptions().getTaskStatusUpdateInterval()),
+ _totalElementsToProcess(0)
{
- _neighborCountMax = -1;
- _neighborCountSum = 0;
- _elementsEvaluated = 0;
- _maxGroupSize = 0;
- _numElementsVisited = 0;
- _numMatchCandidatesVisited = 0;
- _taskStatusUpdateInterval = ConfigOptions().getTaskStatusUpdateInterval();
-
// Calls to script functions/var are expensive, both memory-wise and processing-wise. Since this
- // constructor gets called repeatedly by createMatch, keep them out of this constructor.
+ // constructor gets called repeatedly by createMatch, keep those calls out of this constructor.
// Point/Polygon is not meant to conflate any polygons that are conflatable by other conflation
// routines, hence the use of NonConflatableCriterion.
@@ -109,6 +109,8 @@ public:
new ChainCriterion(
ElementCriterionPtr(new PolygonCriterion()),
ElementCriterionPtr(new NonConflatableCriterion(map))));
+
+ _timer.start();
}
~ScriptMatchVisitor()
@@ -147,6 +149,7 @@ public:
}
virtual QString getDescription() const { return ""; }
+ virtual std::string getClassName() const { return ""; }
void checkForMatch(const std::shared_ptr<const Element>& e)
{
@@ -382,7 +385,7 @@ public:
{
if (!_index)
{
- LOG_INFO("Creating script feature index for: " << _scriptPath << "...");
+ LOG_STATUS("Creating script feature index for: " << _scriptPath << "...");
// No tuning was done, I just copied these settings from OsmMapIndex.
// 10 children - 368 - see #3054
@@ -398,6 +401,7 @@ public:
// Point/Polygon conflation behaves diferently than all other generic scripts in that it
// conflates geometries of different types. This class wasn't really originally designed to
// handle that, so we add a logic path here to accommodate Point/Polygon.
+ long numElementsIndexed = 0;
if (!_scriptPath.contains(ScriptMatchCreator::POINT_POLYGON_SCRIPT_NAME))
{
std::function<bool (ConstElementPtr)> f =
@@ -428,6 +432,7 @@ public:
break;
}
v.finalizeIndex();
+ numElementsIndexed = v.getSize();
}
else
{
@@ -440,10 +445,12 @@ public:
getMap()->visitWaysRo(v);
getMap()->visitRelationsRo(v);
v.finalizeIndex();
+ numElementsIndexed = v.getSize();
}
- LOG_VART(_indexToEid.size());
- LOG_DEBUG("Script feature index created for: " << _scriptPath << ".");
+ LOG_STATUS(
+ "Script feature index created for: " << _scriptPath << " with " <<
+ StringUtils::formatLargeNumber(numElementsIndexed) << " elements.");
}
return _index;
}
@@ -577,12 +584,24 @@ public:
" total elements.");
}
}
+
+ // if matching gets slow, throttle the log update interval accordingly.
+ if (_timer.elapsed() > 3000 && _taskStatusUpdateInterval >= 10)
+ {
+ _taskStatusUpdateInterval /= 10;
+ }
+ else if (_timer.elapsed() < 250 && _taskStatusUpdateInterval < 10000)
+ {
+ _taskStatusUpdateInterval *= 10;
+ }
+
_numElementsVisited++;
- if (_numElementsVisited % (_taskStatusUpdateInterval * 100) == 0)
+ if (_numElementsVisited % _taskStatusUpdateInterval == 0)
{
PROGRESS_INFO(
"Processed " << StringUtils::formatLargeNumber(_numElementsVisited) << " / " <<
- StringUtils::formatLargeNumber(getMap()->getElementCount()) << " elements.");
+ StringUtils::formatLargeNumber(_totalElementsToProcess) << " elements.");
+ _timer.restart();
}
}
@@ -596,7 +615,27 @@ public:
void setCandidateDistanceSigma(double sigma) { _candidateDistanceSigma = sigma; }
CreatorDescription getCreatorDescription() const { return _scriptInfo; }
- void setCreatorDescription(const CreatorDescription& description) { _scriptInfo = description; }
+ void setCreatorDescription(const CreatorDescription& description)
+ {
+ _scriptInfo = description;
+
+ switch (_scriptInfo.geometryType)
+ {
+ case GeometryTypeCriterion::GeometryType::Point:
+ _totalElementsToProcess = getMap()->getNodeCount();
+ break;
+ case GeometryTypeCriterion::GeometryType::Line:
+ _totalElementsToProcess = getMap()->getWayCount() + getMap()->getRelationCount();
+ break;
+ case GeometryTypeCriterion::GeometryType::Polygon:
+ _totalElementsToProcess = getMap()->getWayCount() + getMap()->getRelationCount();
+ break;
+ default:
+ // visit all geometry types if the script didn't identify its geometry
+ _totalElementsToProcess = getMap()->size();
+ break;
+ }
+ }
long getNumMatchCandidatesFound() const { return _numMatchCandidatesVisited; }
@@ -607,17 +646,37 @@ private:
// don't hold on to the map.
std::weak_ptr<const OsmMap> _map;
Persistent<Object> _mapJs;
+
vector<ConstMatchPtr>& _result;
- set<ElementId> _empty;
+ ConstMatchThresholdPtr _mt;
+
+ std::shared_ptr<PluginContext> _script;
+
+ ElementCriterionPtr _filter;
+
+ //used for automatic search radius calculation; it is expected that this is set from the
+ //Javascript rules file used for the generic conflation
+ double _customSearchRadius;
+
int _neighborCountMax;
int _neighborCountSum;
- long _elementCount;
int _elementsEvaluated;
size_t _maxGroupSize;
- ConstMatchThresholdPtr _mt;
- std::shared_ptr<PluginContext> _script;
+ long _numElementsVisited;
+ long _numMatchCandidatesVisited;
+
+ int _taskStatusUpdateInterval;
+
+ long _totalElementsToProcess;
+
+ std::shared_ptr<ChainCriterion> _pointPolyCrit;
+
+ QElapsedTimer _timer;
+
+ long _elementCount;
+
CreatorDescription _scriptInfo;
- ElementCriterionPtr _filter;
+
Persistent<Function> _getSearchRadius;
QHash<ElementId, bool> _matchCandidateCache;
@@ -629,16 +688,10 @@ private:
deque<ElementId> _indexToEid;
double _candidateDistanceSigma;
- //used for automatic search radius calculation; it is expected that this is set from the
- //Javascript rules file used for the generic conflation
- double _customSearchRadius;
- QString _scriptPath;
- long _numElementsVisited;
- long _numMatchCandidatesVisited;
- int _taskStatusUpdateInterval;
+ QString _scriptPath;
- std::shared_ptr<ChainCriterion> _pointPolyCrit;
+ set<ElementId> _empty;
};
ScriptMatchCreator::ScriptMatchCreator()
@@ -677,6 +730,7 @@ void ScriptMatchCreator::setArguments(QStringList args)
//bit of a hack...see MatchCreator.h...need to refactor
_description = QString::fromStdString(className()) + "," + args[0];
_cachedScriptVisitor.reset();
+ _scriptInfo = _getScriptDescription(_scriptPath);
LOG_DEBUG(
"Set arguments for: " << className() << " - rules: " << QFileInfo(_scriptPath).fileName());
@@ -756,9 +810,8 @@ void ScriptMatchCreator::createMatches(
ScriptMatchVisitor v(map, matches, threshold, _script, _filter);
v.setScriptPath(_scriptPath);
- const CreatorDescription scriptInfo = _getScriptDescription(_scriptPath);
- _descriptionCache[_scriptPath] = scriptInfo;
- v.setCreatorDescription(scriptInfo);
+ _descriptionCache[_scriptPath] = _scriptInfo;
+ v.setCreatorDescription(_scriptInfo);
v.initSearchRadiusInfo();
v.calculateSearchRadius();
@@ -781,16 +834,15 @@ void ScriptMatchCreator::createMatches(
"within a search radius of " + QString::number(searchRadius, 'g', 2) + " meters";
}
LOG_STATUS(
- "Looking for matches with: " << className() << ";" << scriptFileInfo.fileName() << " " <<
- searchRadiusStr << "...");
+ "Looking for matches with: " << scriptFileInfo.fileName() << " " << searchRadiusStr << "...");
LOG_VARD(*threshold);
const int matchesSizeBefore = matches.size();
_cachedCustomSearchRadii[_scriptPath] = searchRadius;
_candidateDistanceSigmaCache[_scriptPath] = v.getCandidateDistanceSigma();
- LOG_VARD(GeometryTypeCriterion::typeToString(scriptInfo.geometryType));
- switch (scriptInfo.geometryType)
+ LOG_VARD(GeometryTypeCriterion::typeToString(_scriptInfo.geometryType));
+ switch (_scriptInfo.geometryType)
{
case GeometryTypeCriterion::GeometryType::Point:
map->visitNodesRo(v);
@@ -810,7 +862,7 @@ void ScriptMatchCreator::createMatches(
}
const int matchesSizeAfter = matches.size();
- QString matchType = CreatorDescription::baseFeatureTypeToString(scriptInfo.baseFeatureType);
+ QString matchType = CreatorDescription::baseFeatureTypeToString(_scriptInfo.baseFeatureType);
// Workaround for the Point/Polygon script since it doesn't identify a base feature type. See
// note in ScriptMatchVisitor::getIndex and rules/PointPolygon.js.
if (_scriptPath.contains(POINT_POLYGON_SCRIPT_NAME))
@@ -949,6 +1001,24 @@ CreatorDescription ScriptMatchCreator::_getScriptDescription(QString path) const
Handle<Value> value = ToLocal(&plugin)->Get(geometryTypeStr);
result.geometryType = GeometryTypeCriterion::typeFromString(toCpp<QString>(value));
}
+ // This controls which feature types a script conflates and is required. It allows for disabling
+ // superfluous conflate ops. It should probably be integrated with isMatchCandidate somehow at
+ // some point, if possible.
+ Handle<String> matchCandidateCriterionStr =
+ String::NewFromUtf8(current, "matchCandidateCriterion");
+ if (ToLocal(&plugin)->Has(matchCandidateCriterionStr))
+ {
+ Handle<Value> value = ToLocal(&plugin)->Get(matchCandidateCriterionStr);
+ const QString valueStr = toCpp<QString>(value);
+ if (valueStr.contains(";"))
+ {
+ result.matchCandidateCriteria = valueStr.split(";");
+ }
+ else
+ {
+ result.matchCandidateCriteria = QStringList(valueStr);
+ }
+ }
QFileInfo fi(path);
result.className = (QString::fromStdString(className()) + "," + fi.fileName()).toStdString();
@@ -1011,4 +1081,9 @@ QString ScriptMatchCreator::getName() const
return QString::fromStdString(className()) + ";" + scriptFileInfo.fileName();
}
+QStringList ScriptMatchCreator::getCriteria() const
+{
+ return _scriptInfo.matchCandidateCriteria;
+}
+
}