Skip to content

Commit

Permalink
Implementation of query buffering, job partitioning, and progress rep…
Browse files Browse the repository at this point in the history
…orting.
  • Loading branch information
dbukki committed Apr 18, 2024
1 parent 5285056 commit 3e2b26e
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,32 @@ odb::query<TQueryParam> getFilterPathsQuery(
return query;
}

template<typename TTask>
class MetricsTasks
{
public:
typedef typename std::vector<TTask>::const_iterator TTaskIter;

const TTaskIter& begin() const { return _begin; }
const TTaskIter& end() const { return _end; }
std::size_t size() const { return _size; }

MetricsTasks(
const TTaskIter& begin_,
const TTaskIter& end_,
std::size_t size_
) :
_begin(begin_),
_end(end_),
_size(size_)
{}

private:
TTaskIter _begin;
TTaskIter _end;
std::size_t _size;
};


class CppMetricsParser : public AbstractParser
{
Expand Down Expand Up @@ -88,34 +114,95 @@ class CppMetricsParser : public AbstractParser
/// specified worker function on parallel threads.
/// This call blocks the caller thread until all workers are finished.
/// @tparam TQueryParam The type of parameters to query.
/// @param name_ The name of the metric (for progress logging).
/// @param partitions_ The number of jobs to partition the query into.
/// @param query_ A filter query for retrieving only
/// the eligible parameters for which a worker should be spawned.
/// @param worker_ The logic of the worker thread.
template<typename TQueryParam>
void parallelCalcMetric(
const char* name_,
std::size_t partitions_,
const odb::query<TQueryParam>& query_,
const std::function<void(const TQueryParam&)>& worker_)
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
{
std::unique_ptr<util::JobQueueThreadPool<TQueryParam>> pool =
util::make_thread_pool<TQueryParam>(_threadCount, worker_);
typedef MetricsTasks<TQueryParam> TMetricsTasks;
typedef typename TMetricsTasks::TTaskIter TTaskIter;
typedef std::pair<std::size_t, TMetricsTasks> TJobParam;

// Define the thread pool and job wrapper function.
LOG(info) << name_ << " : Collecting jobs from database...";
std::unique_ptr<util::JobQueueThreadPool<TJobParam>> pool =
util::make_thread_pool<TJobParam>(_threadCount,
[&](const TJobParam& job)
{
LOG(info) << '(' << job.first << '/' << partitions_
<< ") " << name_;
worker_(job.second);
});

// Cache the results of the query that will be dispatched to workers.
std::vector<TQueryParam> tasks;
util::OdbTransaction {_ctx.db} ([&, this]
{
// Storing the result directly and then calling odb::result<>::cache()
// on it does not work: odb::result<>::size() will always throw
// odb::result_not_cached. As of writing, this is a limitation of SQLite.
// So we fall back to the old-fashioned way: std::vector<> in memory.
for (const TQueryParam& param : _ctx.db->query<TQueryParam>(query_))
pool->enqueue(param);
tasks.emplace_back(param);
});

// Ensure that all workers receive at least one task.
std::size_t taskCount = tasks.size();
if (partitions_ > taskCount)
partitions_ = taskCount;

// Dispatch jobs to workers in discrete packets.
LOG(info) << name_ << " : Dispatching jobs on "
<< _threadCount << " thread(s)...";
std::size_t prev = 0;
TTaskIter it_prev = tasks.cbegin();

std::size_t i = 0;
while (i < partitions_)
{
std::size_t next = taskCount * ++i / partitions_;
std::size_t size = next - prev;
TTaskIter it_next = it_prev;
std::advance(it_next, size);

pool->enqueue(TJobParam(i, TMetricsTasks(it_prev, it_next, size)));

prev = next;
it_prev = it_next;
}

// Await the termination of all workers.
pool->wait();
LOG(info) << name_ << " : Calculation finished.";
}

/// @brief Calculates a metric by querying all objects of the
/// specified parameter type and passing them one-by-one to the
/// specified worker function on parallel threads.
/// This call blocks the caller thread until all workers are finished.
/// @tparam TQueryParam The type of parameters to query.
/// @param name_ The name of the metric (for progress logging).
/// @param partitions_ The number of jobs to partition the query into.
/// @param worker_ The logic of the worker thread.
template<typename TQueryParam>
void parallelCalcMetric(
const std::function<void(const TQueryParam&)>& worker_)
{ parallelCalcMetric<TQueryParam>(odb::query<TQueryParam>(), worker_); }
const char* name_,
std::size_t partitions_,
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
{
parallelCalcMetric<TQueryParam>(
name_,
partitions_,
odb::query<TQueryParam>(),
worker_);
}


int _threadCount;
Expand Down
203 changes: 109 additions & 94 deletions plugins/cpp_metrics/parser/src/cppmetricsparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,33 +103,43 @@ bool CppMetricsParser::cleanupDatabase()
void CppMetricsParser::functionParameters()
{
parallelCalcMetric<model::CppFunctionParamCountWithId>(
"Function parameters",
_threadCount * 5,// number of jobs; adjust for granularity
getFilterPathsQuery<model::CppFunctionParamCountWithId>(),
[&, this](const model::CppFunctionParamCountWithId& funParams)
[&, this](const MetricsTasks<model::CppFunctionParamCountWithId>& tasks)
{
util::OdbTransaction {_ctx.db} ([&, this]
{
model::CppAstNodeMetrics funcParams;
funcParams.astNodeId = funParams.id;
funcParams.type = model::CppAstNodeMetrics::Type::PARAMETER_COUNT;
funcParams.value = funParams.count;
_ctx.db->persist(funcParams);
for (const model::CppFunctionParamCountWithId& param : tasks)
{
model::CppAstNodeMetrics funcParams;
funcParams.astNodeId = param.id;
funcParams.type = model::CppAstNodeMetrics::Type::PARAMETER_COUNT;
funcParams.value = param.count;
_ctx.db->persist(funcParams);
}
});
});
}

void CppMetricsParser::functionMcCabe()
{
parallelCalcMetric<model::CppFunctionMcCabe>(
"Function-level McCabe",
_threadCount * 5,// number of jobs; adjust for granularity
getFilterPathsQuery<model::CppFunctionMcCabe>(),
[&, this](const model::CppFunctionMcCabe& function)
[&, this](const MetricsTasks<model::CppFunctionMcCabe>& tasks)
{
util::OdbTransaction {_ctx.db} ([&, this]
{
model::CppAstNodeMetrics funcMcCabe;
funcMcCabe.astNodeId = function.astNodeId;
funcMcCabe.type = model::CppAstNodeMetrics::Type::MCCABE;
funcMcCabe.value = function.mccabe;
_ctx.db->persist(funcMcCabe);
for (const model::CppFunctionMcCabe& param : tasks)
{
model::CppAstNodeMetrics funcMcCabe;
funcMcCabe.astNodeId = param.astNodeId;
funcMcCabe.type = model::CppAstNodeMetrics::Type::MCCABE;
funcMcCabe.value = param.mccabe;
_ctx.db->persist(funcMcCabe);
}
});
});
}
Expand All @@ -138,100 +148,105 @@ void CppMetricsParser::lackOfCohesion()
{
// Calculate the cohesion metric for all types on parallel threads.
parallelCalcMetric<model::CohesionCppRecordView>(
"Lack of cohesion",
_threadCount * 25,// number of jobs; adjust for granularity
getFilterPathsQuery<model::CohesionCppRecordView>(),
[&, this](const model::CohesionCppRecordView& type)
[&, this](const MetricsTasks<model::CohesionCppRecordView>& tasks)
{
// Simplify some type names for readability.
typedef std::uint64_t HashType;

typedef odb::query<model::CohesionCppFieldView>::query_columns QField;
const auto& QFieldTypeHash = QField::CppMemberType::typeHash;

typedef odb::query<model::CohesionCppMethodView>::query_columns QMethod;
const auto& QMethodTypeHash = QMethod::CppMemberType::typeHash;

typedef odb::query<model::CohesionCppAstNodeView>::query_columns QNode;
const auto& QNodeFilePath = QNode::File::path;
const auto& QNodeRange = QNode::CppAstNode::location.range;

util::OdbTransaction {_ctx.db} ([&, this]
{
std::unordered_set<HashType> fieldHashes;
// Query all fields of the current type.
for (const model::CohesionCppFieldView& field
: _ctx.db->query<model::CohesionCppFieldView>(
QFieldTypeHash == type.entityHash
))
{
// Record these fields for later use.
fieldHashes.insert(field.entityHash);
}
std::size_t fieldCount = fieldHashes.size();

std::size_t methodCount = 0;
std::size_t totalCohesion = 0;
// Query all methods of the current type.
for (const model::CohesionCppMethodView& method
: _ctx.db->query<model::CohesionCppMethodView>(
QMethodTypeHash == type.entityHash
))
// Simplify some type names for readability.
typedef std::uint64_t HashType;

typedef odb::query<model::CohesionCppFieldView>::query_columns QField;
const auto& QFieldTypeHash = QField::CppMemberType::typeHash;

typedef odb::query<model::CohesionCppMethodView>::query_columns QMethod;
const auto& QMethodTypeHash = QMethod::CppMemberType::typeHash;

typedef odb::query<model::CohesionCppAstNodeView>::query_columns QNode;
const auto& QNodeFilePath = QNode::File::path;
const auto& QNodeRange = QNode::CppAstNode::location.range;

for (const model::CohesionCppRecordView& type : tasks)
{
// Do not consider methods with no explicit bodies.
const model::Position start(method.startLine, method.startColumn);
const model::Position end(method.endLine, method.endColumn);
if (start < end)
std::unordered_set<HashType> fieldHashes;
// Query all fields of the current type.
for (const model::CohesionCppFieldView& field
: _ctx.db->query<model::CohesionCppFieldView>(
QFieldTypeHash == type.entityHash
))
{
std::unordered_set<HashType> usedFields;

// Query all AST nodes that use a variable for reading or writing...
for (const model::CohesionCppAstNodeView& node
: _ctx.db->query<model::CohesionCppAstNodeView>(
// ... in the same file as the current method
(QNodeFilePath == method.filePath &&
// ... within the textual scope of the current method's body.
(QNodeRange.start.line >= start.line
|| (QNodeRange.start.line == start.line
&& QNodeRange.start.column >= start.column)) &&
(QNodeRange.end.line <= end.line
|| (QNodeRange.end.line == end.line
&& QNodeRange.end.column <= end.column)))
))
// Record these fields for later use.
fieldHashes.insert(field.entityHash);
}
std::size_t fieldCount = fieldHashes.size();

std::size_t methodCount = 0;
std::size_t totalCohesion = 0;
// Query all methods of the current type.
for (const model::CohesionCppMethodView& method
: _ctx.db->query<model::CohesionCppMethodView>(
QMethodTypeHash == type.entityHash
))
{
// Do not consider methods with no explicit bodies.
const model::Position start(method.startLine, method.startColumn);
const model::Position end(method.endLine, method.endColumn);
if (start < end)
{
// If this AST node is a reference to a field of the type...
if (fieldHashes.find(node.entityHash) != fieldHashes.end())
std::unordered_set<HashType> usedFields;

// Query AST nodes that use a variable for reading or writing...
for (const model::CohesionCppAstNodeView& node
: _ctx.db->query<model::CohesionCppAstNodeView>(
// ... in the same file as the current method
(QNodeFilePath == method.filePath &&
// ... within the textual scope of the current method's body.
(QNodeRange.start.line >= start.line
|| (QNodeRange.start.line == start.line
&& QNodeRange.start.column >= start.column)) &&
(QNodeRange.end.line <= end.line
|| (QNodeRange.end.line == end.line
&& QNodeRange.end.column <= end.column)))
))
{
// ... then mark it as used by this method.
usedFields.insert(node.entityHash);
// If this AST node is a reference to a field of the type...
if (fieldHashes.find(node.entityHash) != fieldHashes.end())
{
// ... then mark it as used by this method.
usedFields.insert(node.entityHash);
}
}

++methodCount;
totalCohesion += usedFields.size();
}

++methodCount;
totalCohesion += usedFields.size();
}
}

// Calculate and record metrics.
const double dF = fieldCount;
const double dM = methodCount;
const double dC = totalCohesion;
const bool trivial = fieldCount == 0 || methodCount == 0;
const bool singular = methodCount == 1;

// Standard lack of cohesion (range: [0,1])
model::CppAstNodeMetrics lcm;
lcm.astNodeId = type.astNodeId;
lcm.type = model::CppAstNodeMetrics::Type::LACK_OF_COHESION;
lcm.value = trivial ? 0.0 :
(1.0 - dC / (dM * dF));
_ctx.db->persist(lcm);

// Henderson-Sellers variant (range: [0,2])
model::CppAstNodeMetrics lcm_hs;
lcm_hs.astNodeId = type.astNodeId;
lcm_hs.type = model::CppAstNodeMetrics::Type::LACK_OF_COHESION_HS;
lcm_hs.value = trivial ? 0.0 : singular ? NAN :
((dM - dC / dF) / (dM - 1.0));
_ctx.db->persist(lcm_hs);
// Calculate and record metrics.
const double dF = fieldCount;
const double dM = methodCount;
const double dC = totalCohesion;
const bool trivial = fieldCount == 0 || methodCount == 0;
const bool singular = methodCount == 1;

// Standard lack of cohesion (range: [0,1])
model::CppAstNodeMetrics lcm;
lcm.astNodeId = type.astNodeId;
lcm.type = model::CppAstNodeMetrics::Type::LACK_OF_COHESION;
lcm.value = trivial ? 0.0 :
(1.0 - dC / (dM * dF));
_ctx.db->persist(lcm);

// Henderson-Sellers variant (range: [0,2])
model::CppAstNodeMetrics lcm_hs;
lcm_hs.astNodeId = type.astNodeId;
lcm_hs.type = model::CppAstNodeMetrics::Type::LACK_OF_COHESION_HS;
lcm_hs.value = trivial ? 0.0 : singular ? NAN :
((dM - dC / dF) / (dM - 1.0));
_ctx.db->persist(lcm_hs);
}
});
});
}
Expand Down

0 comments on commit 3e2b26e

Please sign in to comment.