diff --git a/classes/robot/crawler.php b/classes/robot/crawler.php index 497519b2..a7b329d5 100644 --- a/classes/robot/crawler.php +++ b/classes/robot/crawler.php @@ -916,16 +916,17 @@ private function link_from_node_to_url($from, $url, $text, $idattr) { global $DB; // Ascertain the correct node level based on parent node level. - if ($from->level == TOOL_CRAWLER_NODE_LEVEL_PARENT) { + if (!empty($from->level) && $from->level == TOOL_CRAWLER_NODE_LEVEL_PARENT) { $level = TOOL_CRAWLER_NODE_LEVEL_DIRECT_CHILD; } else { $level = TOOL_CRAWLER_NODE_LEVEL_INDIRECT_CHILD; } - $priority = $from->priority ? $from->priority : TOOL_CRAWLER_PRIORITY_DEFAULT; + $priority = isset($from->priority) ? $from->priority : TOOL_CRAWLER_PRIORITY_DEFAULT; + $courseid = isset($from->courseid) ? $from->courseid : null; // Add the node URL to the queue. - $to = $this->mark_for_crawl($from->url, $url, $from->courseid, $priority, $level); + $to = $this->mark_for_crawl($from->url, $url, $courseid, $priority, $level); if ($to === false) { return false; } diff --git a/tests/phpunit/robot_crawler_test.php b/tests/phpunit/robot_crawler_test.php index 84a2dc46..08c80d28 100644 --- a/tests/phpunit/robot_crawler_test.php +++ b/tests/phpunit/robot_crawler_test.php @@ -295,6 +295,7 @@ public function test_should_be_excluded() { $node->contents = $page . $linktoexclude; $node->url = $url; $node->id = $insertid; + $node->level = TOOL_CRAWLER_NODE_LEVEL_PARENT; $this->resetAfterTest(true); @@ -310,6 +311,67 @@ public function test_should_be_excluded() { self::assertFalse($found); } + /** + * Test for issue #108 - passing node crawl priority to child nodes when parsing html. + */ + public function test_parse_html_priority_inheritance() { + global $CFG, $DB; + + $parentlocalurl = 'course/view.php?id=1§ion=2'; + $directchildlocalurl = 'mod/book/view.php?id=7'; + $indirectchildexternalurl = 'http://someexternalsite.net.au'; + $nodes = []; + + // Internal parent node. + $node = $this->robot->mark_for_crawl($CFG->wwwroot, $parentlocalurl, 1, TOOL_CRAWLER_PRIORITY_HIGH); + $node->httpcode = 200; + $node->mimetype = 'text/html'; + $node->external = 0; + $node->contents = << + +
+ +