diff --git a/admin/MBImport.pl b/admin/MBImport.pl index 8104204ba7e..c5ecec5dd16 100755 --- a/admin/MBImport.pl +++ b/admin/MBImport.pl @@ -10,6 +10,10 @@ use Getopt::Long; use DBDefs; use Sql; +use MusicBrainz::Script::Utils qw( + copy_table_from_file + is_table_empty +); use MusicBrainz::Server::Replication qw( :replication_type ); use MusicBrainz::Server::Constants qw( @FULL_TABLE_LIST ); @@ -17,7 +21,6 @@ my ($fHelp, $fIgnoreErrors); my $tmpdir = '/tmp'; -my $fProgress = -t STDOUT; my $fFixUTF8 = 0; my $skip_ensure_editor = 0; my $update_replication_control = 1; @@ -214,115 +217,25 @@ sub usage exit($errors ? 1 : 0); - - sub ImportTable { my ($table, $file) = @_; - print localtime() . " : load $table\n"; - - my $rows = 0; - - my $t1 = [gettimeofday]; - my $interval; - - my $size = -s($file) - or return 1; - - my $p = sub { - my ($pre, $post) = @_; - no integer; - printf $pre.'%-30.30s %9d %3d%% %9d'.$post, - $table, $rows, int(100 * tell(LOAD) / $size), - $rows / ($interval||1); - }; - - $OUTPUT_AUTOFLUSH = 1; - - eval - { - # open in :bytes mode (always keep byte octets), to allow fixing of invalid - # UTF-8 byte sequences in --fix-broken-utf8 mode. - # in default mode, the Pg driver will take care of the UTF-8 transformation - # and croak on any invalid UTF-8 character - open(LOAD, '<:bytes', $file) or die "open $file: $OS_ERROR"; - - # If you're looking at this code because your import failed, maybe - # with an error like this: - # ERROR: copy: line 1, Missing data for column "automodsaccepted" - # then the chances are it's because the data you're trying to load - # doesn't match the structure of the database you're trying to load it - # into. Please make sure you've got the right copy of the server - # code, as described in the INSTALL file. - - $sql->begin; - $sql->do("DELETE FROM $table") if $delete_first; - my $dbh = $sql->dbh; # issues a ping, must be done before COPY - $sql->do("COPY $table FROM stdin"); - - $p->('', '') if $fProgress; - my $t; - - use Encode; - while () - { - $t = $_; - if ($fFixUTF8) { - # replaces any invalid UTF-8 character with special 0xFFFD codepoint - # and warn on any such occurence - $t = Encode::decode('UTF-8', $t, Encode::FB_DEFAULT | Encode::WARN_ON_ERR); - } else { - $t = Encode::decode('UTF-8', $t, Encode::FB_CROAK); - } - if (!$dbh->pg_putcopydata($t)) - { - print 'ERROR while processing: ', $t; - die; - } - - ++$rows; - unless ($rows & 0xFFF) - { - $interval = tv_interval($t1); - $p->("\r", '') if $fProgress; - } - } - $dbh->pg_putcopyend() or die; - $interval = tv_interval($t1); - $p->(($fProgress ? "\r" : ''), sprintf(" %.2f sec\n", $interval)); - - close LOAD - or die $OS_ERROR; - - $sql->commit; - - die 'Error loading data' - if -f $file and empty($table); + my $rows = copy_table_from_file( + $sql, $table, $file, + delete_first => $delete_first, + fix_utf8 => $fFixUTF8, + ignore_errors => $fIgnoreErrors, + ); + if ($rows) { ++$tables; $totalrows += $rows; - - 1; - }; - - return 1 unless $EVAL_ERROR; - warn "Error loading $file: $EVAL_ERROR"; - $sql->rollback; - - ++$errors, return 0 if $fIgnoreErrors; - exit 1; -} - -sub empty -{ - my $table = shift; - - my $any = $sql->select_single_value( - "SELECT 1 FROM $table LIMIT 1", - ); - - not defined $any; + return 1; + } else { + ++$errors; + return 0; + } } sub ImportAllTables @@ -342,7 +255,7 @@ sub ImportAllTables { my $basetable = $1; - if (not empty($basetable) and not $delete_first) + if (not is_table_empty($sql, $basetable) and not $delete_first) { warn "$basetable table already contains data; skipping $table\n"; next; @@ -352,7 +265,7 @@ sub ImportAllTables ImportTable($basetable, $file) or next; } else { - if (not empty($table) and not $delete_first) + if (not is_table_empty($sql, $table) and not $delete_first) { warn "$table already contains data; skipping\n"; next; diff --git a/admin/replication/ImportReplicationChanges b/admin/replication/ImportReplicationChanges index 6eb20718382..6608d772f4f 100755 --- a/admin/replication/ImportReplicationChanges +++ b/admin/replication/ImportReplicationChanges @@ -9,6 +9,10 @@ use FindBin; use lib "$FindBin::Bin/../../lib"; use Getopt::Long; +use MusicBrainz::Script::Utils qw( + copy_table_from_file + is_table_empty +); use MusicBrainz::Server::Context; use DBDefs; use Sql; @@ -104,83 +108,19 @@ sub ImportTable { my ($table, $file) = @_; - print localtime() . " : load $table\n"; - - my $rows = 0; - - my $t1 = [gettimeofday]; - my $interval; - - my $size = -s($file) || 1; - - my $p = sub { - my ($pre, $post) = @_; - no integer; - printf $pre.'%-30.30s %9d %3d%% %9d'.$post, - $table, $rows, int(100 * tell(LOAD) / $size), - $rows / ($interval||1); - }; - - $OUTPUT_AUTOFLUSH = 1; - - eval - { - open(LOAD, '<:encoding(utf8)', $file) or die "open $file: $OS_ERROR"; - - $sql->begin; - my $dbh = $sql->dbh; # issues a ping, must be done before COPY - $sql->do("COPY $table FROM stdin"); - - $p->('', ''); - - while () - { - $dbh->pg_putcopydata($_) or die; - - ++$rows; - unless ($rows & 0xFFF) - { - $interval = tv_interval($t1); - $p->("\r", ''); - } - } - - $dbh->pg_putcopyend() or die; - - $interval = tv_interval($t1); - $p->("\r", sprintf(" %.2f sec\n", $interval)); - - close LOAD - or die $OS_ERROR; - - $sql->commit; - - die 'Error loading data' - if -f $file and empty($table); + my $rows = copy_table_from_file( + $sql, $table, $file, + ignore_errors => $fIgnoreErrors, + ); + if ($rows) { ++$tables; $totalrows += $rows; - - 1; - }; - - return 1 unless $EVAL_ERROR; - warn "Error loading $file: $EVAL_ERROR"; - $sql->rollback; - - ++$errors, return 0 if $fIgnoreErrors; - exit 1; -} - -sub empty -{ - my $table = shift; - - my $any = $sql->select_single_value( - "SELECT 1 FROM $table LIMIT 1", - ); - - not defined $any; + return 1; + } else { + ++$errors; + return 0; + } } sub ImportReplicationTables @@ -197,7 +137,7 @@ sub ImportReplicationTables my $file = find_file($table); $file or print("No data file found for '$table', skipping\n"), die; - if (not empty($table)) + if (not is_table_empty($sql, $table)) { die "$table already contains data; skipping\n"; next; @@ -226,7 +166,7 @@ sub ImportDBMirror2ReplicationTables { } my $qualified_table = "dbmirror2.$table"; - if (!empty($qualified_table)) { + if (!is_table_empty($sql, $qualified_table)) { die "$qualified_table already contains data"; } diff --git a/lib/MusicBrainz/Script/Utils.pm b/lib/MusicBrainz/Script/Utils.pm index 407e8306c23..5f0260db48b 100644 --- a/lib/MusicBrainz/Script/Utils.pm +++ b/lib/MusicBrainz/Script/Utils.pm @@ -2,19 +2,133 @@ package MusicBrainz::Script::Utils; use strict; use warnings; +use Encode; use English; +use Time::HiRes qw( gettimeofday tv_interval ); use feature 'state'; use base 'Exporter'; our @EXPORT_OK = qw( + copy_table_from_file get_primary_keys get_foreign_keys + is_table_empty log retry ); +=sub copy_table_from_file + +Imports C<$file> into C<$table> via PostgreSQL's C statement. + +Returns the number of rows imported. + +=cut + +sub copy_table_from_file { + my ($sql, $table, $file, %opts) = @_; + + my $delete_first = $opts{delete_first}; + my $fix_utf8 = $opts{fix_utf8}; + my $ignore_errors = $opts{ignore_errors}; + my $quiet = $opts{quiet}; + my $show_progress = !$quiet && ($opts{show_progress} // (-t STDOUT)); + + print localtime() . " : load $table\n" + unless $quiet; + + my $rows = 0; + my $t1 = [gettimeofday]; + my $interval; + + my $size = -s($file) + or return 1; + + my $p = sub { + my ($pre, $post) = @_; + no integer; + printf $pre.'%-30.30s %9d %3d%% %9d'.$post, + $table, $rows, int(100 * tell(LOAD) / $size), + $rows / ($interval || 1); + }; + + $OUTPUT_AUTOFLUSH = 1; + + eval { + # Open in :bytes mode (always keep byte octets), to allow fixing of + # invalid UTF-8 byte sequences in --fix-broken-utf8 mode. + # In default mode, the Pg driver will take care of the UTF-8 + # transformation and croak on any invalid UTF-8 character. + open(LOAD, '<:bytes', $file) or die "open $file: $OS_ERROR"; + + # If you're looking at this code because your import failed, maybe + # with an error like this: + # ERROR: copy: line 1, Missing data for column "automodsaccepted" + # then the chances are it's because the data you're trying to load + # doesn't match the structure of the database you're trying to load + # it into. Please make sure you've got the right copy of the server + # code, as described in the INSTALL file. + + $sql->begin; + $sql->do("DELETE FROM $table") if $delete_first; + + my $dbh = $sql->dbh; # issues a ping, must be done before COPY + $sql->do("COPY $table FROM stdin"); + + $p->('', '') if $show_progress; + + my $t; + while () { + $t = $_; + if ($fix_utf8) { + # Replaces any invalid UTF-8 character with special 0xFFFD + # codepoint and warn on any such occurence. + $t = Encode::decode('UTF-8', $t, + Encode::FB_DEFAULT | + Encode::WARN_ON_ERR); + } else { + $t = Encode::decode('UTF-8', $t, Encode::FB_CROAK); + } + if (!$dbh->pg_putcopydata($t)) { + print 'ERROR while processing: ', $t; + die; + } + + ++$rows; + unless ($rows & 0xFFF) { + $interval = tv_interval($t1); + $p->("\r", '') if $show_progress; + } + } + + $dbh->pg_putcopyend or die; + + $interval = tv_interval($t1); + $p->(($show_progress ? "\r" : ''), + sprintf(" %.2f sec\n", $interval)) + unless $quiet; + + close LOAD + or die $OS_ERROR; + + $sql->commit; + + die 'Error loading data' + if -f $file and is_table_empty($sql, $table); + + 1; + }; + + return $rows unless $EVAL_ERROR; + warn "Error loading $file: $EVAL_ERROR"; + $sql->rollback; + + return 0 if $ignore_errors; + exit 1; +} + =sub get_foreign_keys Get a list of foreign key columns for (C<$schema>, C<$table>). @@ -119,6 +233,20 @@ sub get_primary_keys($$$) { return @keys; } +=sub is_table_empty + +Returns whether C<$table> is empty. + +=cut + +sub is_table_empty { + my ($sql, $table) = @_; + + not defined $sql->select_single_value(<<~"SQL"); + SELECT 1 FROM $table LIMIT 1; + SQL +} + =sub log Log a message to stdout, prefixed with the local time and ending with a