Skip to content

Commit

Permalink
made bin/wantedpage.php more flexible
Browse files Browse the repository at this point in the history
* results can be sorted by wanted or origin
* second dimension can be skipped
* results should be easier to parse with standard unix tools now
* use proper otpions, not commands
  • Loading branch information
splitbrain committed Jul 28, 2017
1 parent ebd3919 commit 64cebf7
Showing 1 changed file with 57 additions and 42 deletions.
99 changes: 57 additions & 42 deletions bin/wantedpages.php
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
#!/usr/bin/php
<?php
if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__).'/../').'/');
if(!defined('DOKU_INC')) define('DOKU_INC', realpath(dirname(__FILE__) . '/../') . '/');
define('NOSESSION', 1);
require_once(DOKU_INC.'inc/init.php');
require_once(DOKU_INC . 'inc/init.php');

/**
* Find wanted pages
*/
class WantedPagesCLI extends DokuCLI {

const DIR_CONTINUE = 1;
const DIR_NS = 2;
const DIR_PAGE = 3;
private $show_pages = false;
const DIR_NS = 2;
const DIR_PAGE = 3;

private $skip = false;
private $sort = 'wanted';

private $result = array();

/**
* Register options and arguments on the given $options object
*
Expand All @@ -21,18 +26,27 @@ class WantedPagesCLI extends DokuCLI {
*/
protected function setup(DokuCLI_Options $options) {
$options->setHelp(
'Outputs a list of wanted pages (pages which have internal links but do not yet exist).'
'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
' (the pages that are linkin to these missing pages).'
);
$options->registerArgument(
'namespace',
'The namespace to lookup. Defaults to root namespace',
false
);
$options->registerCommand(
'show-pages',
'Show wiki pages on which broken links (i.e. wanted pages) are found, listed as: wiki_page=>broken_link'

$options->registerOption(
'sort',
'Sort by wanted or origin page',
's',
'(wanted|origin)'
);

$options->registerOption(
'skip',
'Do not show the second dimension',
'k'
);
}

/**
Expand All @@ -46,28 +60,30 @@ protected function setup(DokuCLI_Options $options) {
protected function main(DokuCLI_Options $options) {

if($options->args) {
$startdir = dirname(wikiFN($options->args[0].':xxx'));
$startdir = dirname(wikiFN($options->args[0] . ':xxx'));
} else {
$startdir = dirname(wikiFN('xxx'));
}

$cmd = $options->getCmd();
if($cmd == 'show-pages') {
$this->show_pages = true;
}

$this->info("searching $startdir");
$this->skip = $options->getOpt('skip');
$this->sort = $options->getOpt('sort');

$wanted_pages = array();
$this->info("searching $startdir");

foreach($this->get_pages($startdir) as $page) {
$wanted_pages = array_merge($wanted_pages, $this->internal_links($page));
$this->internal_links($page);
}
$wanted_pages = array_unique($wanted_pages);
sort($wanted_pages);

foreach($wanted_pages as $page) {
print $page."\n";
ksort($this->result);
foreach($this->result as $main => $subs) {
if($this->skip) {
print "$main\n";
} else {
$subs = array_unique($subs);
sort($subs);
foreach($subs as $sub) {
printf("%-40s %s\n", $main, $sub);
}
}
}
}

Expand All @@ -82,7 +98,7 @@ protected function dir_filter($entry, $basepath) {
if($entry == '.' || $entry == '..') {
return WantedPagesCLI::DIR_CONTINUE;
}
if(is_dir($basepath.'/'.$entry)) {
if(is_dir($basepath . '/' . $entry)) {
if(strpos($entry, '_') === 0) {
return WantedPagesCLI::DIR_CONTINUE;
}
Expand All @@ -105,25 +121,25 @@ protected function get_pages($dir) {
static $trunclen = null;
if(!$trunclen) {
global $conf;
$trunclen = strlen($conf['datadir'].':');
$trunclen = strlen($conf['datadir'] . ':');
}

if(!is_dir($dir)) {
throw new DokuCLI_Exception("Unable to read directory $dir");
}

$pages = array();
$dh = opendir($dir);
$dh = opendir($dir);
while(false !== ($entry = readdir($dh))) {
$status = $this->dir_filter($entry, $dir);
if($status == WantedPagesCLI::DIR_CONTINUE) {
continue;
} else if($status == WantedPagesCLI::DIR_NS) {
$pages = array_merge($pages, $this->get_pages($dir.'/'.$entry));
$pages = array_merge($pages, $this->get_pages($dir . '/' . $entry));
} else {
$page = array(
'id' => pathID(substr($dir.'/'.$entry, $trunclen)),
'file' => $dir.'/'.$entry,
$page = array(
'id' => pathID(substr($dir . '/' . $entry, $trunclen)),
'file' => $dir . '/' . $entry,
);
$pages[] = $page;
}
Expand All @@ -133,35 +149,34 @@ protected function get_pages($dir) {
}

/**
* Parse instructions and returns the non-existing links
* Parse instructions and add the non-existing links to the result array
*
* @param array $page array with page id and file path
* @return array
*/
function internal_links($page) {
global $conf;
$instructions = p_get_instructions(file_get_contents($page['file']));
$links = array();
$cns = getNS($page['id']);
$exists = false;
$cns = getNS($page['id']);
$exists = false;
$pid = $page['id'];
foreach($instructions as $ins) {
if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
$mid = $ins[1][0];
resolve_pageid($cns, $mid, $exists);
if(!$exists) {
list($mid) = explode('#', $mid); //record pages without hashs
if($this->show_pages) {
$links[] = "$pid => $mid";
}
else $links[] = $mid;
list($mid) = explode('#', $mid); //record pages without hashes

if($this->sort == 'origin') {
$this->result[$pid][] = $mid;
} else {
$this->result[$mid][] = $pid;
}
}
}
}
return $links;
}
}

// Main
$cli = new WantedPagesCLI();
$cli->run();
$cli->run();

0 comments on commit 64cebf7

Please sign in to comment.