diff --git a/classes/robot/crawler.php b/classes/robot/crawler.php
index a6a34416..9605bd6b 100644
--- a/classes/robot/crawler.php
+++ b/classes/robot/crawler.php
@@ -306,12 +306,20 @@ public function get_queue_size() {
* @return object|boolean The node record if the resource pointed to by the URL can and should be considered; or `false` if the
* URL is invalid or excluded.
*/
- public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOOL_CRAWLER_PRIORITY_DEFAULT) {
+ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOOL_CRAWLER_PRIORITY_DEFAULT,
+ $level = TOOL_CRAWLER_NODE_LEVEL_PARENT) {
global $DB, $CFG;
$url = $this->absolute_url($baseurl, $url);
+ // Strip priority from indirect child nodes. Only parent and direct children
+ // of parent nodes have priority applied to avoid recursively applying priority
+ // to all ancestors of a parent node.
+ if ($level == TOOL_CRAWLER_NODE_LEVEL_INDIRECT_CHILD) {
+ $priority = TOOL_CRAWLER_PRIORITY_DEFAULT;
+ }
+
// Filter out non http protocols like mailto:cqulibrary@cqu.edu.au etc.
$bits = parse_url($url);
if (array_key_exists('scheme', $bits)
@@ -420,6 +428,7 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
$node->external = self::is_external($url);
$node->needscrawl = time();
$node->priority = $priority;
+ $node->level = $level;
if (isset($courseid)) {
$node->courseid = $courseid;
@@ -438,6 +447,11 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
$node->priority = $priority;
$needsupdating = true;
}
+ if ($node->level != $level) {
+ // Set the level again, in case this node has been seen again at a different
+ // level, to avoid reprocessing.
+ $node->level = $level;
+ }
if (isset($courseid)) {
$node->courseid = $courseid;
$needsupdating = true;
@@ -901,8 +915,15 @@ private function link_from_node_to_url($from, $url, $text, $idattr) {
global $DB;
+ // Ascertain the correct node level based on parent node level.
+ if ($from->level == TOOL_CRAWLER_NODE_LEVEL_PARENT) {
+ $level = TOOL_CRAWLER_NODE_LEVEL_DIRECT_CHILD;
+ } else {
+ $level = TOOL_CRAWLER_NODE_LEVEL_INDIRECT_CHILD;
+ }
+
// Add the node URL to the queue.
- $to = $this->mark_for_crawl($from->url, $url, null, $from->priority);
+ $to = $this->mark_for_crawl($from->url, $url, $from->courseid, $from->priority, $level);
if ($to === false) {
return false;
}
diff --git a/constants.php b/constants.php
index 860819e9..b0649738 100644
--- a/constants.php
+++ b/constants.php
@@ -93,3 +93,12 @@
define('TOOL_CRAWLER_PRIORITY_DEFAULT', 0);
define('TOOL_CRAWLER_PRIORITY_NORMAL', 50);
define('TOOL_CRAWLER_PRIORITY_HIGH', 100);
+
+/**
+ * Node level assigned to each node based on whether it is the parent node, or
+ * a child node discovered within a parent when crawling, or any child of a child
+ * node (or even further removed).
+ */
+define('TOOL_CRAWLER_NODE_LEVEL_PARENT', 2);
+define('TOOL_CRAWLER_NODE_LEVEL_DIRECT_CHILD', 1);
+define('TOOL_CRAWLER_NODE_LEVEL_INDIRECT_CHILD', 0);
diff --git a/db/install.xml b/db/install.xml
index d2510ded..34277e4e 100644
--- a/db/install.xml
+++ b/db/install.xml
@@ -27,6 +27,8 @@
+
+
diff --git a/db/upgrade.php b/db/upgrade.php
index 3138ffa5..01dc8f24 100644
--- a/db/upgrade.php
+++ b/db/upgrade.php
@@ -94,5 +94,21 @@ function xmldb_tool_crawler_upgrade($oldversion) {
upgrade_plugin_savepoint(true, 2019100300, 'tool', 'crawler');
}
+ if ($oldversion < 2020012300) {
+
+ // Define field level to be added to tool_crawler_url.
+ $table = new xmldb_table('tool_crawler_url');
+ $field = new xmldb_field('level', XMLDB_TYPE_INTEGER, '1', null, null, null, '2', 'priority');
+
+ // Conditionally launch add field level.
+ if (!$dbman->field_exists($table, $field)) {
+ $dbman->add_field($table, $field);
+ }
+
+ // Crawler savepoint reached.
+ upgrade_plugin_savepoint(true, 2020012300, 'tool', 'crawler');
+ }
+
+
return true;
}
diff --git a/version.php b/version.php
index fc112523..0e6dfe3b 100644
--- a/version.php
+++ b/version.php
@@ -27,8 +27,8 @@
defined('MOODLE_INTERNAL') || die();
-$plugin->version = 2020010600; // The current plugin version (Date: YYYYMMDDXX)
-$plugin->release = 2020010600; // The current plugin version (Date: YYYYMMDDXX)
+$plugin->version = 2020012300; // The current plugin version (Date: YYYYMMDDXX)
+$plugin->release = 2020012300; // The current plugin version (Date: YYYYMMDDXX)
$plugin->requires = 2016021800; // Requires this Moodle version.
$plugin->component = 'tool_crawler'; // To check on upgrade, that module sits in correct place.
$plugin->maturity = MATURITY_STABLE;