-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdump-cpan-modules-for-author.pl
41 lines (35 loc) · 1.24 KB
/
dump-cpan-modules-for-author.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/bin/perl -w
# dump-cpan-modules-for-author - display modules a CPAN author owns
use LWP::Simple;
use URI;
use HTML::TableContentParser;
use HTML::Entities;
use strict;
our $URL = shift || 'http://search.cpan.org/author/TOMC/';
my $table = get_tables($URL);
my $modules = $tables->[4];
foreach my $r (@{ $modules->{rows}}) {
my ($module_name, $module_link, $status, $description) = parse_module_row($r, $URL);
print "$module_name <$module_link>\n\t$status\n\t$description\n\n";
}
sub get_tables {
my $URL = shift;
my $page = get($URL);
my $tcp = new HTML::TableContentParser;
return $tcp->parse($page);
}
sub parse_module_row {
my ($row, $URL) = @_;
my ($module_html, $module_link, $module_name, $status, $description);
# extract cells
$module_html = $row->{cells}[0]{data};
$status = $row->{cells}[1]{data};
$description = $row->{cells}[2]{data};
$status =~ s{<.*?>}{}g;
($module_link, $module_name) = $module_html =~ m{href="(.*?)".*?>(.*)<}i;
$module_link = URI->new_abs($module_link, $URL); #resolve relative links
# clean up entities and tags
decode_entites($module_name);
decode_entities($description);
return ($module_name, $module_link, $status, $description);
}