From 2620c10d19b7b881135949a2fc65a1ff0b3c0379 Mon Sep 17 00:00:00 2001
From: Tim Landscheidt
Date: Tue, 11 Dec 2012 18:18:57 +0000
Subject: [PATCH] Remove trailing whitespace.
---
README | 8 +-
checkwiki.pl | 2202 +++++++++++++++++++++++++-------------------------
sql.txt | 66 +-
3 files changed, 1138 insertions(+), 1138 deletions(-)
diff --git a/README b/README
index 4dc6cec..b649c31 100644
--- a/README
+++ b/README
@@ -1,11 +1,11 @@
Abstract:
---------
-The WikiProject Check Wikipedia will help to
-clean the syntax of Wikipedia and help to find
-some other errors.
+The WikiProject Check Wikipedia will help to
+clean the syntax of Wikipedia and help to find
+some other errors.
-License
+License
-------
GPL3 license
diff --git a/checkwiki.pl b/checkwiki.pl
index 9b32799..a3d91fb 100644
--- a/checkwiki.pl
+++ b/checkwiki.pl
@@ -30,15 +30,15 @@
# delete_old_errors_in_db --> Problem with deleting of errors in loadmodus
# delete_deleted_article_from_db --> Problem old articles
-
+
#################################################################
# Load Module
#################################################################
#use lib "C:/perl/lib";
-
+
use URI::Escape;
use LWP::UserAgent;
-
+
#use CGI::Carp qw(fatalsToBrowser);
#use lib '/home/sk/perl/checkwiki';
@@ -48,10 +48,10 @@
#}
# use new_coordinates;
-
+
#use lib '../module';
#use wikipedia;
-
+
#use URI::Escape;
#use LWP::UserAgent;
@@ -60,13 +60,13 @@
#################################################################
our $dump_directory = '/mnt/user-store/dumps/'; # toolserver
# our $dump_directory = '../../dump/'; # home or usb
-
+
our $output_directory = '/mnt/user-store/sk/data/checkwiki/';
our $input_directory_new = '/mnt/user-store/sk/data/new_article/';
our $input_directory_change = '/mnt/user-store/sk/data/last_changes/';
our $output_templatetiger = '/mnt/user-store/sk/data/templatetiger/';
our $output_geo = '/mnt/user-store/sk/data/geo/';
-
+
#our $dump_filename = '/mnt/user-store/dump/dewiki-20080607-pages-articles.xml'; #'Wikipedia-20080502083556.xml';
our $dump_filename = '';
#$dump_filename ='../../dump/dewiki-20071217-pages-articles.xml';
@@ -74,14 +74,14 @@
#################################################################
# Declaration of variables (global)
#################################################################
-
+
our $quit_program = 'no'; # quit the program (yes,no), for quit the programm in an emergency
our $quit_reason = ''; # quit the program reason
our $test_programm = 'true'; # only for program tests
-
+
our $dump_or_live = ''; # scan modus (dump, live, only)
- our $silent_modus = ''; # silent modus (very low output at screen) for batch
- our $test_modus = ''; # silent modus (very low output at screen) for batch
+ our $silent_modus = ''; # silent modus (very low output at screen) for batch
+ our $test_modus = ''; # silent modus (very low output at screen) for batch
our $starter_modus = ''; # to update in the loadmodus the cw_starter table
our $load_modus_done = 'yes'; # done article from db
@@ -89,8 +89,8 @@
our $load_modus_dump = 'yes'; # new article from db
our $load_modus_last_change = 'yes'; # last_change article from db
our $load_modus_old = 'yes'; # old article from db
-
-
+
+
our $details_for_page = 'no'; # yes/no durring the scan you can get more details for a article scan
@@ -100,28 +100,28 @@
our $line_number = 0; # number of line in dump
our $project = ''; # name of the project 'dewiki'
- our $language = ''; # language of dump 'de', 'en';
+ our $language = ''; # language of dump 'de', 'en';
our $page_number = 0; # number of pages in namesroom 0
our $base = ''; # base of article, 'http://de.wikipedia.org/wiki/Hauptseite'
our $home = ''; # base of article, 'http://de.wikipedia.org/wiki/'
-
+
our @namespace; # namespace values
# 0 number
# 1 namespace in project language
# 2 namespace in english language
- our $namespaces_count = -1; # number of namespaces
+ our $namespaces_count = -1; # number of namespaces
our @namespacealiases; # namespacealiases values
# 0 number
- # 1 namespacealias
- our $namespacealiases_count= -1; # number of namespacealiases
-
+ # 1 namespacealias
+ our $namespacealiases_count= -1; # number of namespacealiases
+
our @namespace_cat; #all namespaces for categorys
our @namespace_image; #all namespaces for images
our @namespace_templates; #all namespaces for templates
-
+
our @magicword_defaultsort;
-
+
our @magicword_img_thumbnail;
our @magicword_img_manualthumb;
our @magicword_img_right;
@@ -144,10 +144,10 @@
our @magicword_img_middle;
our @magicword_img_bottom;
our @magicword_img_text_bottom;
-
-
+
+
# Wiki-special variables
-
+
our @live_article; # to-do-list for live (all articles to scan)
our $current_live_article = -1; # line_number_of_current_live_article
our $number_of_live_tests = -1; # Number of articles for live test
@@ -158,7 +158,7 @@
our @article_was_scanned; #if an article was scanned, this will insert here
our $xml_text_from_api = ''; # the text from more then one articles from the API
-
+
our $error_counter = -1; # number of found errors in all article
our @error_description; # Error Description
@@ -174,19 +174,19 @@
# 9 XHTML translation title
# 10 XHTML translation description
- our $number_of_error_description = -1; # number of error_description
+ our $number_of_error_description = -1; # number of error_description
+
-
our $max_error_count = 50; # maximum of shown article per error
our $maximum_current_error_scan = -1; # how much shold be scanned for reach the max_error_count
our $rest_of_errors_not_scan_yet = '';
our $number_of_all_errors_in_all_articles = 0; #all errors
-
+
our $for_statistic_new_article = 0;
our $for_statistic_last_change_article = 0;
our $for_statistic_geo_article = 0;
our $for_statistic_number_of_articles_with_error = 0;
-
+
###########################
@@ -213,7 +213,7 @@
our $log_file = 'log.txt';
our $templatetiger_filename = '';
-
+
our @inter_list = ( 'af', 'als', 'an', 'ar',
'bg', 'bs',
'ca', 'cs', 'cy',
@@ -226,7 +226,7 @@
'ja', 'jv',
'ka', 'ko',
'la', 'lb', 'lt',
- 'ms',
+ 'ms',
'nds', 'nds_nl', 'nl', 'nn', 'no',
'pl', 'pt',
'ro', 'ru',
@@ -237,8 +237,8 @@
'yi',
'zh'
);
-
- our @foundation_projects = ( 'wikibooks', 'b',
+
+ our @foundation_projects = ( 'wikibooks', 'b',
'wiktionary', 'wikt',
'wikinews', 'n',
'wikiquote', 'q',
@@ -256,28 +256,28 @@
'nost',
'testwiki'
);
-
+
# current time
our ($akSekunden, $akMinuten, $akStunden, $akMonatstag, $akMonat,
$akJahr, $akWochentag, $akJahrestag, $akSommerzeit) = localtime(time);
our $CTIME_String = localtime(time);
$akMonat = $akMonat + 1;
- $akJahr = $akJahr + 1900;
+ $akJahr = $akJahr + 1900;
$akMonat = "0".$akMonat if ($akMonat<10);
$akMonatstag = "0".$akMonatstag if ($akMonatstag<10);
$akStunden = "0".$akStunden if ($akStunden<10);
$akMinuten = "0".$akMinuten if ($akMinuten<10);
-
-
+
+
our $translation_page = ''; # name of the page with translation for example in de: "Wikipedia:WikiProject Check Wikipedia/Übersetzung"
-
+
our $start_text = '';
$start_text = $start_text ."The WikiProject '''Check Wikipedia''' will help to clean up the syntax of Wikipedia and to find some other errors.\n";
$start_text = $start_text ."\n";
$start_text = $start_text ."'''Betatest''' - At the moment the script has some bugs and not every error on this page is an actual error. \n";
- $start_text = $start_text ."\n";
-
-
+ $start_text = $start_text ."\n";
+
+
our $description_text = '';
$description_text = $description_text ."== Project description in English == \n";
@@ -310,10 +310,10 @@
$description_text = $description_text ."** insert an article by hand since it will disappear from the list with the next automatic update of this page. \n";
$description_text = $description_text ."** try to fix spelling mistakes within this page since all manual changes will disappear as well with the next update. Instead, send an e-mail or message to the author so he can fix the spelling in the script. \n";
$description_text = $description_text ."\n";
-
-
- our $category_text = '';
-
+
+
+ our $category_text = '';
+
our $top_priority_script = 'Top priority';
our $top_priority_project = '';
our $middle_priority_script = 'Middle priority';
@@ -321,11 +321,11 @@
our $lowest_priority_script = 'Lowest priority';
our $lowest_priority_project = '';
-
+
our $dbh; # DatenbaaseHandler
-
-
-
+
+
+
###############################
# variables for one article
###############################
@@ -337,10 +337,10 @@
our $text = ''; # text of the current article (for work)
our $text_origin = ''; # text of the current article origin (for save)
our $text_without_comments = ''; # text of the current article without_comments (for save)
-
+
our $page_namespace = -100; # namespace of page
- our $page_is_redirect = 'no';
+ our $page_is_redirect = 'no';
our $page_is_disambiguation = 'no';
our $page_categories = '';
@@ -352,33 +352,33 @@
our @comments; # 0 pos_start
# 1 pos_end
# 2 comment
- our $comment_counter = -1; #number of comments in this page
-
+ our $comment_counter = -1; #number of comments in this page
+
our @category; # 0 pos_start
# 1 pos_end
- # 2 category Test
+ # 2 category Test
# 3 linkname Linkname
# 4 original [[Category:Test|Linkname]]
-
+
our $category_counter = -1;
our $category_all = ''; # all categries
our @interwiki; # 0 pos_start
# 1 pos_end
- # 2 interwiki Test
+ # 2 interwiki Test
# 3 linkname Linkname
# 4 original [[de:Test|Linkname]]
# 5 language
-
+
our $interwiki_counter = -1;
- our @lines; # text seperated in lines
+ our @lines; # text seperated in lines
our @headlines; # headlines
our @section; # text between headlines
undef(@section);
-
+
our @lines_first_blank; # all lines where the first character is ' '
-
+
our @templates_all; # all templates
our @template; # templates with values
# 0 number of template
@@ -386,8 +386,8 @@
# 2 template_row
# 3 attribut
# 4 value
- our $number_of_template_parts = -1; # number of all template parts
-
+ our $number_of_template_parts = -1; # number of all template parts
+
our @links_all; # all links
our @images_all; # all images
our @isbn; # all ibsn of books
@@ -395,43 +395,43 @@
our $page_has_geo_error = 'no'; # yes/no geo error in this page
our $page_geo_error_number = -1; # number of all article for this page
-
+
our $end_of_dump = 'no'; # when last article from dump scan then 'yes', else 'no'
our $end_of_live = 'no'; # when last article from live scan then 'yes', else 'no'
-
-
+
+
check_input_arguments();
open_db();
open_file() if ($quit_program eq 'no'); # logfile, dumpfile, metadata (API, File)
-
+
get_error_description() if ($quit_program eq 'no'); # all errordescription from this script
- load_text_translation() if ($quit_program eq 'no'); # load translation from wikipage
+ load_text_translation() if ($quit_program eq 'no'); # load translation from wikipage
output_errors_desc_in_db() if ($quit_program eq 'no'); # update the database with newest error description
output_text_translation_wiki() if ($quit_program eq 'no'); # output the new wikipage for translation
-
+
load_article_for_live_scan() if ($quit_program eq 'no'); # only for live
scan_pages() if ($quit_program eq 'no'); # scan all aricle
close_file(); # close dump or templatetiger-file
-
+
update_table_cw_error_from_dump() if ($quit_program eq 'no');
- delete_deleted_article_from_db() if ($quit_program eq 'no');
- delete_article_from_table_cw_new() if ($quit_program eq 'no');
- delete_article_from_table_cw_change() if ($quit_program eq 'no');
+ delete_deleted_article_from_db() if ($quit_program eq 'no');
+ delete_article_from_table_cw_new() if ($quit_program eq 'no');
+ delete_article_from_table_cw_change() if ($quit_program eq 'no');
update_table_cw_starter();
-
+
#output_errors() if ($quit_program eq 'no');
output_little_statistic() if ($quit_program eq 'no'); # print counter of found errors
output_duration() if ($quit_program eq 'no'); # print time at the end
- print $quit_reason if ($quit_reason ne '');
+ print $quit_reason if ($quit_reason ne '');
close_db();
close_logfile();
print 'finish'."\n";
-
+
#################################################################
#################################################################
@@ -444,7 +444,7 @@ sub get_time_string{
my ($aakSekunden, $aakMinuten, $aakStunden, $aakMonatstag, $aakMonat,
$aakJahr, $aakWochentag, $aakJahrestag, $aakSommerzeit) = localtime(time);
$aakMonat = $aakMonat + 1;
- $aakJahr = $aakJahr + 1900;
+ $aakJahr = $aakJahr + 1900;
$aakMonat = "0".$aakMonat if ($aakMonat<10);
$aakMonatstag = "0".$aakMonatstag if ($aakMonatstag<10);
$aakStunden = "0".$aakStunden if ($aakStunden<10);
@@ -475,7 +475,7 @@ sub check_input_arguments{
$language = $project;
$language =~ s/source$//;
$language =~ s/wiki$//;
-
+
}
}
if ($found_argv eq 'no'){
@@ -483,7 +483,7 @@ sub check_input_arguments{
$quit_reason = $quit_reason. 'no project name, for example: "p=dewiki"'."\n\n";
$quit_program = 'yes';
}
-
+
####################
#check argument value for scanmodus
$found_argv = 'no';
@@ -491,7 +491,7 @@ sub check_input_arguments{
my $current_argv = $_;
if ( $current_argv eq 'm=dump'
or $current_argv eq 'm=live'
- or $current_argv eq 'm=only' )
+ or $current_argv eq 'm=only' )
{
$found_argv = 'yes';
$dump_or_live = $current_argv;
@@ -502,8 +502,8 @@ sub check_input_arguments{
#no scan modus
$quit_reason = $quit_reason. 'modus unknown, for example: "m=dump/live/only"'."\n\n";
$quit_program = 'yes';
- }
-
+ }
+
####################
#check argument value for silent or test
$found_argv = 'no';
@@ -513,7 +513,7 @@ sub check_input_arguments{
$silent_modus = 'silent' if ( $current_argv eq 'silent' );
$test_modus = 'test' if ( $current_argv eq 'test');
$starter_modus = 'starter' if ( $current_argv eq 'starter');
-
+
if ( index($current_argv,'load=')==0 and $dump_or_live eq 'live' ) {
#print 'loadmodus'."\n";
#print "\t".'Load_modus='.$current_argv."\n";
@@ -522,12 +522,12 @@ sub check_input_arguments{
$load_modus_dump = 'no' if (index($current_argv, 'dump') == -1) ; # new article from db
$load_modus_last_change = 'no' if (index($current_argv, 'last_change') == -1) ; # last_change article from db
$load_modus_old = 'no' if (index($current_argv, 'old') == -1) ; # old article from db
-
-
+
+
}
}
-
+
if ($quit_program eq 'yes'){
#End of Script, because no correct parameter
$quit_reason = $quit_reason.'Use for scan a dump'."\n";
@@ -542,17 +542,17 @@ sub check_input_arguments{
$quit_reason = $quit_reason.'perl -w checkwiki.pl p=dewiki m=live silent update_error_desc'."\n";
$quit_reason = $quit_reason.'perl -w checkwiki.pl p=dewiki m=live load=new/done/dumpscan/lastchange/old limit=500'."\n"; #starter modus
$quit_reason = $quit_reason."\n";
-
+
} else {
-
+
# All parameters available and correct
- # extract parameters
+ # extract parameters
print "\n";
if ($silent_modus ne 'silent') {
print '##################################################'."\n";
print '######## checkwiki.pl - Version 0.21 ########'."\n";
-
+
}
print '##################################################'."\n";
print 'Start: '."\t\t".$akJahr.'-'.$akMonat.'-'.$akMonatstag.' '.$akStunden.':'.$akMinuten."\n";
@@ -564,15 +564,15 @@ sub check_input_arguments{
print 'scan a dump only some errors' if ($dump_or_live eq 'only');
print ')'."\n";
}
-
+
if ($test_modus eq 'test') { #modus only for test
$project = $project.'_test';
print "\t\t\t".'Test-Modus --> '.$project.'!!!'."\n";
}
-
+
}
-
-
+
+
}
@@ -609,7 +609,7 @@ sub open_db{
AutoCommit => 1
}
) or die "Database connection not made: $DBI::errstr" . DBI->errstr;
- } else {
+ } else {
$dbh = DBI->connect( 'DBI:mysql:u_sk_yarrow:host=sql', # Toolserver
'sk',
$password ,
@@ -618,8 +618,8 @@ sub open_db{
AutoCommit => 1
}
) or die "Database connection not made: $DBI::errstr" . DBI->errstr;
- }
-
+ }
+
$password = '';
@@ -645,14 +645,14 @@ sub get_error_description{
error_list('get_description');
# count the number of error description
-
+
$number_of_error_description = 1; # first error is error with number 1
while (defined($error_description[$number_of_error_description][1]) ) {
#print $number_of_error_description.' '. $error_description[$number_of_error_description][1]."\n";
$number_of_error_description = $number_of_error_description + 1;
}
-
-
+
+
# set all known error description to a basic level
for (my $i = 1; $i <= $number_of_error_description; $i++) {
#$error_description[$i][0] = -1; # set in error
@@ -666,11 +666,11 @@ sub get_error_description{
$error_description[$i][8] = 0;
$error_description[$i][9] = '';
$error_description[$i][10] = '';
-
+
}
my $output_number = $number_of_error_description -1;
print $output_number .' error description in script'."\n" if ($silent_modus ne 'silent');
-
+
}
@@ -692,19 +692,19 @@ sub open_file{
# open logfile
my $log_filename = $output_directory.$project.'/'.$project.'_'.$log_file;
open (LOGFILE, '+>'.$log_filename) if ($starter_modus ne 'starter');
-
-
-
+
+
+
################################
# if new dump is available
if ($dump_or_live eq 'dump') {
$dump_filename = search_for_last_dump();
print 'Dump_filename:'."\t\t".$dump_filename."\n" if ($silent_modus ne 'silent');
-
-
+
+
my $last_dump_filename = $output_directory.$project.'/'.$project.'_last_dump_name.txt';
print $last_dump_filename."\n";
-
+
if (not (-e $last_dump_filename)) {
# create the file if not exist
system ('touch '.$last_dump_filename);
@@ -713,7 +713,7 @@ sub open_file{
print LAST_DUMP_NAME_FIRST 'x';
close(LAST_DUMP_NAME_FIRST);
}
-
+
#read the last name
#print 'check old dumpname'."\n";
open (LAST_DUMP_NAME, '<'.$last_dump_filename);
@@ -721,9 +721,9 @@ sub open_file{
$last_dump_name_old = ;
#$last_dump_name_old = '' if not defined;
$last_dump_name_old =~ s/\n//g;
-
+
close(LAST_DUMP_NAME);
-
+
#get date from dumpfile
our $dump_date_for_output = $dump_filename;
$dump_date_for_output =~ s/^[^\-]-//g;
@@ -732,7 +732,7 @@ sub open_file{
$dump_date_for_output = substr($dump_date_for_output,0,4).'-'.substr($dump_date_for_output,4,2).'-'.substr($dump_date_for_output,6,2);
#print $dump_date_for_output."\n";
-
+
if ($dump_filename ne $last_dump_name_old ) {
# if not the newest dump then start dump scan
print 'Last: '."\t\t". $last_dump_name_old."\n";
@@ -746,8 +746,8 @@ sub open_file{
# system ('nice -n 5 perl -w checkwiki.pl p='.$project.' m=dump silent') ;
# print "\n\n";
# }
- }
-
+ }
+
#update last_dump time for project in database
my $sql_text = "update cw_project set last_dump ='".$dump_date_for_output."' where project = '". $project ."';";
my $sth = $dbh->prepare( $sql_text );
@@ -757,30 +757,30 @@ sub open_file{
my $sql_text2 = "delete from cw_dumpscan where project = '". $project ."';";
$sth = $dbh->prepare( $sql_text2 );
$sth->execute;
-
-
-
+
+
+
}
################################
-
-
-
+
+
+
if ($dump_or_live eq 'dump' or $dump_or_live eq 'only') {
-
+
#print "lsat=x".$dump_filename."x\n";
-
+
# check for existens dump
-
+
my $full_dump_path_filename = $dump_directory.$project.'/'.$dump_filename;
#print $full_dump_path_filename."\n";
if ($dump_filename ne '' and -e $full_dump_path_filename ) {
- #print 'Data: '."\t\t"."$dump_directory$dump_filename\n";
+ #print 'Data: '."\t\t"."$dump_directory$dump_filename\n";
#open dump
open(DUMP, "bzip2 -d -q <$full_dump_path_filename |");
read_and_write_metadata_from_dump();
@@ -788,7 +788,7 @@ sub open_file{
$quit_program = 'yes';
$quit_reason = $quit_reason. "file '$full_dump_path_filename'". " don't exist!\n";
}
-
+
# Templatetiger
$templatetiger_filename = $output_templatetiger.$project.'/'.$project.'_templatetiger.txt';
if (not (-e $output_templatetiger.$project )) {
@@ -799,12 +799,12 @@ sub open_file{
print 'Delete '.$templatetiger_filename."\n";
system ('rm -f '.$templatetiger_filename) ;
}
-
+
open (TEMPLATETIGER, '>>'.$templatetiger_filename);
-
+
#GEO Export
our $geo_export_filename = $output_geo.$project.'/'.$project.'_coordinates.txt';
@@ -816,9 +816,9 @@ sub open_file{
if (-e $geo_export_filename ) {
print 'Delete '.$geo_export_filename."\n";
system ('rm -f '.$geo_export_filename) ;
- }
+ }
}
-
+
# delete old error_list
if ($quit_program eq 'no' ) {
read_and_write_metadata_from_dump();
@@ -835,16 +835,16 @@ sub search_for_last_dump {
my $last_file ='';
my @xml_files = glob($dump_directory.'/'.$project.'/*-pages-articles.xml.bz2');
my $count_xml_files = @xml_files;
-
+
for (my $i = 0; $i < $count_xml_files; $i++) {
# List of all xml-files in dump_directory
my $byte = -s $xml_files[$i];
#print $xml_files[$i].' '.$byte."\n";
$xml_files[$i] =~ s/(.)+\///g;
-
+
my $project_test = $project;
$project_test =~ s/_test$//;
-
+
if (( index($xml_files[$i], $project.'-') == 0 # only this project
or index($xml_files[$i], $project_test.'-') == 0 ) #
and $byte > 0 ) { # only more then 0 bytes files
@@ -881,7 +881,7 @@ sub load_article_for_live_scan{
print 'create '.$output_directory.$project.'/'.$project.'_'.$error_list_filename. "\n";
system ('touch '.$output_directory.$project.'/'.$project.'_'.$error_list_filename);
-
+
} else {
#read articles(live)
@@ -891,34 +891,34 @@ sub load_article_for_live_scan{
# which are not scan_live - NEW: with table cw_dumpscan
get_oldest_article_from_database(250) if ($load_modus_old eq 'yes'); # get 250 article which are the date of last_scan is very old (dump_scan)
-
- #old
- #article_last_live_scan(); # get all article from last live scan, where the script found errors
- # very long in many languages (maybe later)
+
+ #old
+ #article_last_live_scan(); # get all article from last live scan, where the script found errors
+ # very long in many languages (maybe later)
# replace with done articles
- #article_with_error_from_dump_scan(); # get all articles error from the last dump scan
+ #article_with_error_from_dump_scan(); # get all articles error from the last dump scan
# replace with article_with_error_from_dump_scan2
- #article_with_error_from_dump_scan2() if ($load_modus_dump eq 'yes'); # get 250 articles of each error from the last dump scan,
+ #article_with_error_from_dump_scan2() if ($load_modus_dump eq 'yes'); # get 250 articles of each error from the last dump scan,
#geo_error_article(); # get all articles with geo errors last days
-
-
+
+
# sort all articles (new + live)
@live_article = sort(@live_article);
-
+
# delet all double/multi input article
$number_of_live_tests = @live_article;
#print $number_of_live_tests."\n";
my @new_live_article;
my @split_line;
my @split_line_old;
-
+
if ($number_of_live_tests > 0) {
my $old_title = '';
my $all_errors_of_this_article = '';
my $i = -1;
-
+
foreach (@live_article) {
@split_line_old = @split_line;
@@ -926,14 +926,14 @@ sub load_article_for_live_scan{
my $current_title = $split_line[0];
$split_line[1] =~ s/\n//;
#print $current_title."\n";
-
+
my $number_of_split_line = @split_line;
if ($number_of_split_line != 2) {
print 'Problem with input line:'."\n";
print $_."\n";
die;
};
-
+
if ($old_title ne $current_title
and $old_title ne ''){
#save old
@@ -942,7 +942,7 @@ sub load_article_for_live_scan{
$all_errors_of_this_article = '';
#print "result:".$new_live_article[$i]."\n";
}
-
+
# check new
if ($old_title eq $current_title) {
#double
@@ -958,11 +958,11 @@ sub load_article_for_live_scan{
$i = $i+1;
$new_live_article[$i] = $old_title."\t".$all_errors_of_this_article;
-
+
@live_article = @new_live_article;
$number_of_live_tests = @live_article;
}
- print "\t".$number_of_live_tests."\t".'all articles without double'."\n";
+ print "\t".$number_of_live_tests."\t".'all articles without double'."\n";
print LOGFILE 'articles without double'."\t".$number_of_live_tests."\n" if ($starter_modus ne 'starter');
@new_live_article = (); # free memory
@split_line = (); # free memory
@@ -970,15 +970,15 @@ sub load_article_for_live_scan{
# print LOGFILE $_."\n";
#}
#print LOGFILE 'END LIST'."\n\n";
-
+
if ($number_of_live_tests == 0) {
# if after this load in live_modus no article found, then end the scan
$quit_program = 'yes';
$quit_reason = $quit_reason. 'no article in scan list for live'."\n";
}
-
-
+
+
}
}
}
@@ -1002,14 +1002,14 @@ sub new_article{
my $limit = $_[0];
# oldest not scanned article
# select distinct title from cw_new where scan_live = 0 and project = 'dewiki' and daytime >= (select daytime from cw_new where scan_live = 0 and project = 'dewiki' order by daytime limit 1) order by daytime limit 250;
-
-
+
+
my $sql_text = "select distinct title from cw_new where scan_live = 0 and project = '".$project."' and daytime >= (select daytime from cw_new where scan_live = 0 and project = '".$project."' order by daytime limit 1) order by daytime limit ".$limit.";";
my $result = '';
my $sth = $dbh->prepare( $sql_text );
- #print ''.$sql_text."
\n";
+ #print ''.$sql_text."\n";
$sth->execute;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
@@ -1042,7 +1042,7 @@ sub new_article_old{
$new_counter ++;
}
}
- until (eof(INPUT_NEW) == 1);
+ until (eof(INPUT_NEW) == 1);
close (INPUT_NEW);
}
print "\t".$new_counter."\t".'articles new';
@@ -1059,14 +1059,14 @@ sub last_change_article{
my $limit = $_[0];
# oldest not scanned article
# select distinct title from cw_new where scan_live = 0 and project = 'dewiki' and daytime >= (select daytime from cw_new where scan_live = 0 and project = 'dewiki' order by daytime limit 1) order by daytime limit 250;
-
-
+
+
my $sql_text = "select distinct title from cw_change where scan_live = 0 and project = '".$project."' and daytime >= (select daytime from cw_change where scan_live = 0 and project = '".$project."' order by daytime limit 1) order by daytime limit ".$limit.";";
my $result = '';
my $sth = $dbh->prepare( $sql_text );
- #print ''.$sql_text."\n";
+ #print ''.$sql_text."\n";
$sth->execute;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
@@ -1101,7 +1101,7 @@ sub last_change_article_old{
}
}
}
- until (eof(INPUT_NEW) == 1);
+ until (eof(INPUT_NEW) == 1);
close (INPUT_NEW);
}
print "\t".$change_counter."\t".'articles change';
@@ -1113,7 +1113,7 @@ sub last_change_article_old{
sub geo_error_article{
- # get all last_change article last days
+ # get all last_change article last days
# Load last change articles
my $file_geo = $project.'_'.$error_geo_list_filename;
my $file_input_geo = $output_geo.$project.'/'.$file_geo;
@@ -1135,7 +1135,7 @@ sub geo_error_article{
}
}
}
- until (eof(INPUT_GEO) == 1);
+ until (eof(INPUT_GEO) == 1);
close (INPUT_GEO);
}
print "\t".$geo_counter."\t".'articles geo';
@@ -1150,14 +1150,14 @@ sub article_with_error_from_dump_scan{
my $limit = 250;
# oldest not scanned article
# select distinct title from cw_new where scan_live = 0 and project = 'dewiki' and daytime >= (select daytime from cw_new where scan_live = 0 and project = 'dewiki' order by daytime limit 1) order by daytime limit 250;
-
-
+
+
my $sql_text = "select distinct title from cw_dumpscan where scan_live = 0 and project = '".$project."' limit ".$limit.";";
my $result = '';
my $sth = $dbh->prepare( $sql_text );
- #print ''.$sql_text."\n";
+ #print ''.$sql_text."\n";
$sth->execute;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
@@ -1192,14 +1192,14 @@ sub article_with_error_from_dump_scan_old_old{
}
}
}
- until (eof(INPUT_DUMP) == 1);
+ until (eof(INPUT_DUMP) == 1);
close (INPUT_DUMP);
- # delete
- system ('rm '.$input_dump_errors);
+ # delete
+ system ('rm '.$input_dump_errors);
}
print "\t".$dump_counter."\t".'articles dump'."\n";
print LOGFILE 'articles dump:'."\t\t".$dump_counter."\n" if ($starter_modus ne 'starter');
-
+
}
}
@@ -1207,43 +1207,43 @@ sub article_with_error_from_dump_scan_old_old{
sub article_with_error_from_dump_scan_old{
my $database_dump_scan_counter = 0;
my $limit = 250; # number of articles per run
-
+
# get all error_id and create new sql_text
my $sql_text = " select error_id from (select * from cw_dumpscan where project = '".$project."' and scan_live = false ) a group by a.error_id limit ".$limit.";";
my $result = '';
my $sth = $dbh->prepare( $sql_text );
- #print ''.$sql_text."\n";
+ #print ''.$sql_text."\n";
$sth->execute;
my $union_sql_text = '';
my $i = 0;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
$i = $i +1;
#print $result."\n";
- $union_sql_text .= "union all
- select title from
- (select *
- from cw_dumpscan
+ $union_sql_text .= "union all
+ select title from
+ (select *
+ from cw_dumpscan
where project = '".$project."'
and scan_live = false
and error_id = '".$result."'
limit ".$limit.") a".$i."
- ";
+ ";
}
$union_sql_text =~ s/^union all//;
$union_sql_text = $union_sql_text.';';
-
+
#print $union_sql_text."\n";
-
+
# use union_select, if one or more error found
if ($union_sql_text ne ';') {
-
+
$sth = $dbh->prepare( $union_sql_text );
- #print ''.$sql_text."\n";
+ #print ''.$sql_text."\n";
$sth->execute;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
@@ -1255,16 +1255,16 @@ sub article_with_error_from_dump_scan_old{
print "\t".$database_dump_scan_counter."\t".'articles from dump (not scan live) from db'."\n";
print LOGFILE 'articles from dump (not scan live) from db:'."\t\t".$database_dump_scan_counter."\n" if ($starter_modus ne 'starter');
}
-
+
sub get_done_article_from_database{
my $database_ok_counter = 0;
my $limit = $_[0];
my $sql_text = " select title from cw_error where ok = 1 and project = '".$project."' limit ".$limit.";";
my $result = '';
my $sth = $dbh->prepare( $sql_text );
- #print ''.$sql_text."\n";
+ #print ''.$sql_text."\n";
$sth->execute;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
@@ -1282,9 +1282,9 @@ sub get_oldest_article_from_database{
my $sql_text = " select title from cw_error where project = '".$project."' and DATEDIFF(now(),found) > 31 order by DATEDIFF(now(),found) desc limit ".$limit.";";
my $result = '';
my $sth = $dbh->prepare( $sql_text );
- #print ''.$sql_text."\n";
+ #print ''.$sql_text."\n";
$sth->execute;
- while (my $arrayref = $sth->fetchrow_arrayref()) {
+ while (my $arrayref = $sth->fetchrow_arrayref()) {
foreach(@$arrayref) {
$result = $_;
}
@@ -1311,20 +1311,20 @@ sub scan_pages{
if ($dump_or_live eq 'dump' or $dump_or_live eq 'only') {
get_next_page_from_dump();
- } else {
+ } else {
get_next_page_from_live();
}
-
- if ( $end_of_dump eq 'no'
+
+ if ( $end_of_dump eq 'no'
and $end_of_live eq 'no'
and not ( $title =~ /\.js$/
or $title =~ /\.css$/
)
- )
+ )
{
check_article(); #Main check routine
} else {
- if ( $end_of_dump eq 'yes'
+ if ( $end_of_dump eq 'yes'
or $end_of_live eq 'yes' ) {
print 'articles scan finish'."\n\n" if ($silent_modus ne 'silent');
@@ -1333,14 +1333,14 @@ sub scan_pages{
}
}
}
- until ( $end_of_dump eq 'yes'
+ until ( $end_of_dump eq 'yes'
or $end_of_live eq 'yes'
#or $page_number > 20
#or $page_id > 7950
#or ($error_counter > 10000 and $project ne 'dewiki')
#or ($error_counter > 40000)
or ($error_counter > 40000 and $dump_or_live eq 'live')
- );
+ );
}
@@ -1353,10 +1353,10 @@ sub set_variables_for_article {
$text = ''; # text of the current article (for work)
$text_origin = ''; # text of the current article origin (for save)
$text_without_comments = ''; # text of the current article without_comments (for save)
-
+
$page_namespace = -100; # namespace of page
- $page_is_redirect = 'no';
+ $page_is_redirect = 'no';
$page_is_disambiguation = 'no';
$page_categories = '';
@@ -1368,33 +1368,33 @@ sub set_variables_for_article {
undef(@comments); # 0 pos_start
# 1 pos_end
# 2 comment
- $comment_counter = -1; #number of comments in this page
-
+ $comment_counter = -1; #number of comments in this page
+
undef(@category); # 0 pos_start
# 1 pos_end
- # 2 category Test
+ # 2 category Test
# 3 linkname Linkname
# 4 original [[Category:Test|Linkname]]
-
+
$category_counter = -1;
$category_all = ''; # all categries
undef(@interwiki); # 0 pos_start
# 1 pos_end
- # 2 interwiki Test
+ # 2 interwiki Test
# 3 linkname Linkname
# 4 original [[de:Test|Linkname]]
# 5 language
-
+
$interwiki_counter = -1;
- undef(@lines); # text seperated in lines
+ undef(@lines); # text seperated in lines
undef(@headlines); # headlines
undef(@section); # text between headlines
-
+
undef(@lines_first_blank); # all lines where the first character is ' '
-
+
undef(@templates_all); # all templates
undef(@template); # templates with values
# 0 number of template
@@ -1402,8 +1402,8 @@ sub set_variables_for_article {
# 2 template_row
# 3 attribut
# 4 value
- $number_of_template_parts = -1; # number of all template parts
-
+ $number_of_template_parts = -1; # number of all template parts
+
undef(@links_all); # all links
undef(@images_all); # all images
undef(@isbn); # all ibsn of books
@@ -1411,8 +1411,8 @@ sub set_variables_for_article {
$page_has_geo_error = 'no'; # yes/no geo error in this page
$page_geo_error_number = -1; # number of all article for this page
-
-
+
+
}
@@ -1427,30 +1427,30 @@ sub close_file {
sub update_table_cw_error_from_dump {
-
+
if ($dump_or_live eq 'dump') {
print 'move all article from cw_dumpscan into cw_error'."\n";
my $sql_text;
my $sth;
-
+
$sql_text = "delete from cw_error where project = '".$project."';";
$sth = $dbh->prepare( $sql_text );
$sth->execute;
-
-
+
+
#set @test = 'T%';
#insert into cw_error (select * from cw_dumpscan where project = 'nlwiki' and title like @test);
#delete from cw_dumpscan where project = 'nlwiki' and title like @test;
-
+
$sql_text = "insert into cw_error (select * from cw_dumpscan where project = '".$project."');";
$sth = $dbh->prepare( $sql_text );
- $sth->execute;
+ $sth->execute;
print 'delete all article from this project in cw_dumpscan'."\n";
$sql_text = "delete from cw_dumpscan where project = '".$project."';";
$sth = $dbh->prepare( $sql_text );
- $sth->execute;
+ $sth->execute;
}
}
@@ -1462,7 +1462,7 @@ sub delete_deleted_article_from_db {
#print $sql_text2."\n";
my $sth = $dbh->prepare( $sql_text2 );
$sth->execute;
-}
+}
sub delete_article_from_table_cw_new {
#delete all scanned or older then 7 days from this project
@@ -1470,8 +1470,8 @@ sub delete_article_from_table_cw_new {
#print $sql_text2."\n";
my $sth = $dbh->prepare( $sql_text2 );
$sth->execute;
-
- #delete all articles from don't scan projects
+
+ #delete all articles from don't scan projects
my $sql_text3 = "delete from cw_new where DATEDIFF(now(),daytime) > 8;";
#print $sql_text2."\n";
$sth = $dbh->prepare( $sql_text3 );
@@ -1484,8 +1484,8 @@ sub delete_article_from_table_cw_change {
#print $sql_text2."\n";
my $sth = $dbh->prepare( $sql_text2 );
$sth->execute;
-
- #delete all articles from don't scan projects
+
+ #delete all articles from don't scan projects
my $sql_text3 = "delete from cw_change where DATEDIFF(now(),daytime) > 8;";
$sth = $dbh->prepare( $sql_text3 );
$sth->execute;
@@ -1494,7 +1494,7 @@ sub delete_article_from_table_cw_change {
sub update_table_cw_starter {
if ($starter_modus eq 'starter') {
- print 'update_table_cw_starter'."\n" if ($silent_modus ne 'silent');
+ print 'update_table_cw_starter'."\n" if ($silent_modus ne 'silent');
#print "\t".$error_counter."\t".'errors found'."\n";
if ($error_counter > 0) {
#print '$page_number= '.$page_number."\n";
@@ -1515,7 +1515,7 @@ sub update_table_cw_starter {
$sth = $dbh->prepare( $sql_text);
$sth->execute;
-
+
if ($load_modus_new ne 'yes' and $load_modus_last_change ne 'yes') {
# was something change?
$sql_text = "update cw_starter set last_run_change = 'true' where project ='".$project."';";
@@ -1524,7 +1524,7 @@ sub update_table_cw_starter {
$sth->execute;
}
}
-
+
}
}
@@ -1535,13 +1535,13 @@ sub read_and_write_metadata_from_dump {
# read the metadata from dump (…)
# write this metadata in file for dump and live-scan
#print 'Read metadata from dump and write in file'."\n";
-
+
#old from dump
# my $line ='';
# my $end = 'no';
my $metadata = '';
# do {
- # $line_number = $line_number + 1;
+ # $line_number = $line_number + 1;
# $line = ;
# #print $line_number.' '.$line;
# $line =~ s/\n//;
@@ -1550,16 +1550,16 @@ sub read_and_write_metadata_from_dump {
# $end = 'yes';
# }
#
- # }
+ # }
# until ( $end eq 'yes');
-
+
#new from web
# raw_text2
-
+
#print 'get Metadaten from :'.$project.' '.$language."\n";
$language = 'nds-nl' if ($project eq 'nds_nlwiki');
-
-
+
+
my $url = 'http://'.$language.'.wikipedia.org/w/api.php';
if ($project eq 'commonswiki') {
$url = 'http://commons.wikimedia.org/w/api.php';
@@ -1568,20 +1568,20 @@ sub read_and_write_metadata_from_dump {
$url = 'http://'.$language.'.wikisource.org/w/api.php';
}
$url = $url.'?action=query&meta=siteinfo&siprop=general|namespaces|namespacealiases|statistics|magicwords&format=xml';
-
+
$metadata = raw_text2($url);
$language = 'nds_nl' if ($project eq 'nds_nlwiki');
-
-
-
+
+
+
my $file_metadata = $output_directory.$project.'/'.$project.'_metadata.txt';
print $file_metadata."\n";
open(METADATA, ">$file_metadata");
print METADATA $metadata;
close(METADATA);
$metadata = '';
-
-
+
+
}
sub load_metadata_from_file {
@@ -1592,24 +1592,24 @@ sub load_metadata_from_file {
open(METADATA, "<$file_metadata");
my @metadata = ;
close(METADATA);
-
- my $metatext = '';
+
+ my $metatext = '';
foreach (@metadata) {
$metatext = $metatext.$_;
}
#print $metatext."\n";
#Extract metadata
-
- #sitename
+
+ #sitename
my $sitename = '';
my $pos1 = index($metatext,'sitename="') + length('sitename="');
my $pos2 = index($metatext,'"', $pos1);
$sitename = substr($metatext, $pos1, $pos2 - $pos1);
print 'Sitename: '."\t\t".$sitename."\n" if ($silent_modus ne 'silent');
-
-
+
+
#get base
$base = '';
$pos1 = index($metatext,'base="') + length('base="');
@@ -1619,8 +1619,8 @@ sub load_metadata_from_file {
$home = $base;
$home =~ s/[^\/]+$//;
#print 'Home: '."\t\t".$home."\n";
-
-
+
+
#get namespaces number and name
# for example: 6 Tabulator image
@@ -1628,18 +1628,18 @@ sub load_metadata_from_file {
$pos1 = index($metatext,'') + length('');
$pos2 = index($metatext,'', $pos1);
$namespaces = substr($metatext, $pos1, $pos2 -$pos1);
- #print "x".$namespaces."x\n";
+ #print "x".$namespaces."x\n";
#$namespaces =~ s/^\n//g;
$namespaces =~ s/<\/ns>/\n/g;
$namespaces =~ s/\/>/>\n/g; # only namespace 0 - articles
-
+
# now every namespase in one line
- #print "x".$namespaces."x\n";
-
+ #print "x".$namespaces."x\n";
+
$namespaces =~ s/ case="first-letter"//g;
$namespaces =~ s/ xml:space="preserve"//g;
$namespaces =~ s/ subpages=""//g;
-
+
#$namespaces =~ s/Spezial
-
+
#get id
my $pos1 = index($namespaces_split[$i],'id="') + length('id="');
my $pos2 = index($namespaces_split[$i],'"', $pos1);
my $id = substr($namespaces_split[$i], $pos1, $pos2 -$pos1);
-
-
+
+
#get canonical namspace name
$pos1 = index($namespaces_split[$i],'canonical="') + length('canonical="');
$pos2 = index($namespaces_split[$i],'"', $pos1);
my $canonical = substr($namespaces_split[$i], $pos1, $pos2 -$pos1);
-
+
#get namespace name
$pos1 = index($namespaces_split[$i],'>') + length('>');
my $name = substr($namespaces_split[$i], $pos1);
-
-
+
+
$namespaces_split[$i] = $id."\t".$canonical."\t".$name;
#print $namespaces_split[$i]."\n";
-
+
my @splitter = split( /\t/, $namespaces_split[$i]);
if ( $namespaces_split[$i] =~ /^0/) {
$namespace[$i][0] = 0;
@@ -1689,11 +1689,11 @@ sub load_metadata_from_file {
$namespace[$i][2] = $splitter[1];
$namespace[$i][2] = '' if ($namespace[$i][0] == 0);
-
+
if ($namespace[$i][0] == 6) {
# image
$namespace_image[0] = $namespace[$i][1];
- $namespace_image[1] = $namespace[$i][2];
+ $namespace_image[1] = $namespace[$i][2];
}
if ($namespace[$i][0] == 10) {
# templates
@@ -1707,25 +1707,25 @@ sub load_metadata_from_file {
}
#print $i."\t".$namespace[$i][0]."\t".$namespace[$i][1]."\t".$namespace[$i][1]."\n\n"
}
-
-
-
+
+
+
# namespacealiases
-
+
my $namespacealiases_text = '';
$pos1 = index($metatext,'') + length('');
$pos2 = index($metatext,'', $pos1);
- $namespacealiases_text = substr($metatext, $pos1, $pos2 -$pos1);
+ $namespacealiases_text = substr($metatext, $pos1, $pos2 -$pos1);
#print $namespacealiases_text. "\n";
$namespacealiases_text =~ s/<\/ns>/\n/g;
$namespacealiases_text =~ s//\t/g;
#print $namespacealiases_text. "\n";
-
+
my @namespacealiases_split = split( /\n/, $namespacealiases_text);
$namespacealiases_count = @namespacealiases_split;
-
+
#print $namespaces_count;
for (my $i = 0; $i < $namespacealiases_count; $i++) {
my @splitter = split( /\t/, $namespacealiases_split[$i]);
@@ -1741,13 +1741,13 @@ sub load_metadata_from_file {
#aliasname for category
push(@namespace_cat, $splitter[1]);
}
-
+
#save all aliases
$namespacealiases[$i][0] = $splitter[0];
$namespacealiases[$i][1] = $splitter[1];
#print 'Namespacealiases: '.$namespacealiases[$i][0].','.$namespacealiases[$i][1]."\n";
}
-
+
#foreach (@namespace_image) {
# print $_."\n";
#}
@@ -1756,7 +1756,7 @@ sub load_metadata_from_file {
# print $_."\n";
#}
- #magicwords
+ #magicwords
@magicword_defaultsort = get_magicword($metatext, 'defaultsort');
@magicword_img_thumbnail = get_magicword($metatext, 'img_thumbnail');
@@ -1781,20 +1781,20 @@ sub load_metadata_from_file {
@magicword_img_middle = get_magicword($metatext, 'img_middle');
@magicword_img_bottom = get_magicword($metatext, 'img_bottom');
@magicword_img_text_bottom = get_magicword($metatext, 'img_text_bottom');
-
-
+
+
#foreach (@magicword_defaultsort) {
# print $_."\n";
#}
-
+
}
sub get_magicword {
my $metatext = $_[0];
my $key = $_[1];
my @result;
-
+
my $pos1 = index( $metatext, '
my $revision_start = 0; # find
-
+
#loop for every line
do {
$line = ;
$line_number = $line_number +1;
#$number_of_scan_line = $number_of_scan_line +1; #Security, maybe the finish is not correct
#print "$line";
-
+
if ($line =~ //) {
$start_recording = 1;
}
-
+
if ($start_recording == 1) {
$text = $text.$line;
}
@@ -1845,7 +1845,7 @@ sub get_next_page_from_dump{
$start_recording = 0;
$article_complete = 1;
}
-
+
if ($line =~ //) {
#extract title
$title ="$line";
@@ -1862,7 +1862,7 @@ sub get_next_page_from_dump{
@content= split(/,$content[1]);
$page_id = $content[0];
#print "$page_id\t$title\n";
- }
+ }
if ($line =~ //) {
$revision_start = 1;
@@ -1883,11 +1883,11 @@ sub get_next_page_from_dump{
@content= split(/,$content[1]);
$revision_time=$content[0];
#print $revision_time,"\n";
- }
-
+ }
+
$end_of_dump = 'yes' if ($line =~ /<\/mediawiki>/);
$end_of_dump = 'yes' if (eof(DUMP) == 1);
-
+
}
until ( $article_complete == 1 or $end_of_dump eq 'yes');
#Extract only edit-text
@@ -1896,18 +1896,18 @@ sub get_next_page_from_dump{
$text =~ s///g;
$test = index($text, '');
$text = substr($text,0,$test);
-
+
$text = replace_special_letters($text);
-
+
#if ( $title eq 'At-Tabarī'
# or $title eq 'Rumänien'
# or $title eq 'Liste der Ortsteile im Saarland') {
-
+
# my $output_article_text_file = $output_directory.$project.'/'.$project.'_text_article_'.$title.'.txt';
# open(OUTPUT_ARTICLE_TEXT, ">$output_article_text_file");
# print OUTPUT_ARTICLE_TEXT $text;
# close(OUTPUT_ARTICLE_TEXT);
-
+
#}
#print $text;
}
@@ -1917,7 +1917,7 @@ sub get_next_page_from_live {
if ( $current_live_error_scan != 0 ) {
# Error not 0 (new aricles, and last changes...)
-
+
if ($current_live_error_scan != 0 and $current_live_article == $maximum_current_error_scan) {
# set number higher if not all 50 errors found
#print 'Nr.'.$current_live_error_scan."\n";
@@ -1929,28 +1929,28 @@ sub get_next_page_from_live {
# set higer maximum
$maximum_current_error_scan = $maximum_current_error_scan + ($max_error_count - $error_description[$current_live_error_scan][3]);
#print 'Set higher maximum: '.$maximum_current_error_scan."\n";
- } else {
+ } else {
# stop scan
- save_errors_for_next_scan($current_live_article);
+ save_errors_for_next_scan($current_live_article);
#$rest_of_errors_not_scan_yet
$current_live_article = -1;
}
}
-
+
# find next error with articles
- if (($current_live_error_scan > 0 and $current_live_article == -1)
+ if (($current_live_error_scan > 0 and $current_live_article == -1)
or $current_live_article == $number_article_live_to_scan
or $current_live_error_scan == -1) {
#print 'switch from error to error'."\n";
-
+
$current_live_error_scan = 0 if ($current_live_error_scan == -1); #start with error 1
-
+
do {
$current_live_error_scan ++;
#print $current_live_error_scan."\n";
@live_to_scan = ();
if ($error_description[$current_live_error_scan][3] < $max_error_count) {
- # only if not all found with new/change/last
+ # only if not all found with new/change/last
get_all_error_with_number($current_live_error_scan);
} else {
# if with new /change etc. we found for this error much
@@ -1958,11 +1958,11 @@ sub get_next_page_from_live {
save_errors_for_next_scan(0);
@live_to_scan = ();
}
-
+
$number_article_live_to_scan = @live_to_scan;
} until ($current_live_error_scan >= $number_of_error_description
or $number_article_live_to_scan > 0);
-
+
$maximum_current_error_scan = $max_error_count;
if ($error_description[$current_live_error_scan][3] > 0) {
#print 'More errors for error'.$current_live_error_scan."\n";
@@ -1976,18 +1976,18 @@ sub get_next_page_from_live {
#print 'Error '.$current_live_error_scan.' :'."\t".$number_article_live_to_scan."\n" if ($number_article_live_to_scan > 0);
#print 'Max='.$maximum_current_error_scan."\n";
#print 'Available = '.$number_article_live_to_scan."\n";
-
+
}
}
-
+
if ( $current_live_error_scan == 0
and $current_live_article >= $number_article_live_to_scan ) {
# end of live, no more article to scan
- $end_of_live = 'yes';
+ $end_of_live = 'yes';
}
-
+
if ($current_live_error_scan >= $number_of_error_description) {
# after check live all errors, then start with check of error 0 (new articles, last changes, ...)
$current_live_article = 0;
@@ -1996,22 +1996,22 @@ sub get_next_page_from_live {
get_all_error_with_number($current_live_error_scan);
$number_article_live_to_scan = @live_to_scan;
#print 'Error 0 :'."\t".$number_article_live_to_scan."\n";
- $maximum_current_error_scan = $max_error_count;
- }
-
+ $maximum_current_error_scan = $max_error_count;
+ }
+
#$number_article_live_to_scan = @live_to_scan;
- if ( $current_live_article < $number_article_live_to_scan
+ if ( $current_live_article < $number_article_live_to_scan
and $number_article_live_to_scan > 0
and $end_of_live ne 'yes' ) {
# there is an error with articles
- # now we get the next article
+ # now we get the next article
if ($xml_text_from_api eq '') {
# if list of xml_text_from_api is empty, then load next ariticles
- #print 'Load next texts from API'."\n";
+ #print 'Load next texts from API'."\n";
my $many_titles = '';
- my $i = $current_live_article;
+ my $i = $current_live_article;
my $end_many_title = 'false';
do {
@@ -2025,8 +2025,8 @@ sub get_next_page_from_live {
$i++;
$end_many_title = 'true' if ($i == $number_article_live_to_scan);
$end_many_title = 'true' if ($i == $current_live_article + 25); # not more then 25 articles
- $end_many_title = 'true' if ( length($many_titles) > 2000); # url length not too long (Problem ruwiki and other no latin letters )
- }
+ $end_many_title = 'true' if ( length($many_titles) > 2000); # url length not too long (Problem ruwiki and other no latin letters )
+ }
until ($end_many_title eq 'true');
#print 'Many titles ='.$many_titles."\n";
$xml_text_from_api = raw_text_more_articles( $many_titles );
@@ -2036,17 +2036,17 @@ sub get_next_page_from_live {
$xml_text_from_api =~ s/^//;
$xml_text_from_api =~ s/<\/api>$//;
$xml_text_from_api =~ s/<\/query>$//;
- $xml_text_from_api =~ s/<\/pages>$//;
+ $xml_text_from_api =~ s/<\/pages>$//;
#print $xml_text_from_api."\n";
}
-
-
-
+
+
+
# get next title and text from xml_text_from_api
if ($xml_text_from_api ne '') {
-
+
my $pos_end = index ($xml_text_from_api, '' );
if ($pos_end > -1 ) {
# normal page
@@ -2060,7 +2060,7 @@ sub get_next_page_from_live {
$text = substr ( $xml_text_from_api, 0, $pos_end + length('missing="" />') );;
$xml_text_from_api = substr ( $xml_text_from_api, $pos_end + length('missing="" />') );
if ($pos_end == -1){
- #BIG PROBLEM
+ #BIG PROBLEM
print 'WARNING: Big problem with API'."\n";
print LOGFILE 'WARNING: Big problem with API'."\n" if ($starter_modus ne 'starter');
$text = '';
@@ -2071,12 +2071,12 @@ sub get_next_page_from_live {
my $line = $live_to_scan[$current_live_article];
my @line_split = split( /\t/, $line);
$title = $line_split[0];
-
+
#print $title ."\n";
#print substr ( $text, 0, 150)."\n";
-
+
if (index ( $text, 'title='.'"'.$title.'"') == -1 ) {
- # the result from the api is in a other sort
+ # the result from the api is in a other sort
# know get the current title
# for example
#print "Old title:".$title ."\n";
@@ -2091,23 +2091,23 @@ sub get_next_page_from_live {
#print "\n\n";
}
-
-
+
+
#print $title."\n";
push(@article_was_scanned, $title);
-
+
# get id
my $test_id_pos = index ($text, 'pageid="');
- if ($test_id_pos > -1) {
+ if ($test_id_pos > -1) {
$page_id = substr($text, $test_id_pos + length( 'pageid="') );
$test_id_pos = index ($page_id , '"');
$page_id = substr($page_id, 0, $test_id_pos);
#print $page_id.' - '.$title."\n";
}
-
-
+
+
# get text
my $test = index ($text, '//g;
$test = index($text,'');
- $text = substr($text,0,$test);
+ $text = substr($text,0,$test);
}
-
+
#revision_id
#revision_time
@@ -2125,7 +2125,7 @@ sub get_next_page_from_live {
#print substr($text, 0, 60)."\n";
$text = replace_special_letters($text);
}
- }
+ }
}
sub save_errors_for_next_scan {
@@ -2133,7 +2133,7 @@ sub save_errors_for_next_scan {
$number_article_live_to_scan = @live_to_scan;
for (my $i = $from_number; $i < $number_article_live_to_scan; $i++) {
#print $live_to_scan[$i]."\n";
-
+
my $line = $live_to_scan[$i];
#print '1:'.$line."\n";
my @line_split = split( /\t/, $line);
@@ -2169,7 +2169,7 @@ sub get_all_error_with_number {
if ($found eq 'yes') {
# article has error X
#print 'found '.$current_live_line."\n" if ($error_live == 7);
-
+
# was this article scanned today ?
$found = 'no';
my $number_of_scanned_articles = @article_was_scanned;
@@ -2208,7 +2208,7 @@ sub get_all_error_with_type {
-sub replace_special_letters {
+sub replace_special_letters {
my $content = $_[0];
# only in dump must replace not in live
# http://de.wikipedia.org/w/index.php?title=Benutzer_Diskussion:Stefan_K%C3%BChn&oldid=48573921#Dump
@@ -2227,7 +2227,7 @@ sub replace_special_letters {
sub raw_text {
my $title = $_[0];
-
+
$title =~ s/&/%26/g; # Problem with & in title
$title =~ s/'/'/g; # Problem with apostroph in title
$title =~ s/</content;
my $result2 = '';
$result2 = $content2 if ($content2) ;
-
+
return($result2);
}
sub raw_text2 {
my $url = $_[0];
-
+
$url =~ s/&/%26/g; # Problem with & in title
$url =~ s/'/'/g; # Problem with apostroph in title
-
+
my $response2 ;
uri_escape($url);
my $ua2 = LWP::UserAgent->new;
$response2 = $ua2->get( $url );
-
+
my $content2 = $response2->content;
my $result2 = '';
$result2 = $content2 if ($content2) ;
@@ -2282,7 +2282,7 @@ sub raw_text2 {
sub raw_text_more_articles {
my $title = $_[0];
-
+
#$title =~ s/&/%26/g; # Problem with & in title
#$title =~ s/'/'/g; # Problem with apostroph in title
#$title =~ s/</new;
@@ -2311,7 +2311,7 @@ sub raw_text_more_articles {
sub load_text_translation{
print 'Load tanslation of:'."\t".$project."\n" if ($silent_modus ne 'silent');
-
+
# Input of translation page
$translation_page = 'Wikipedia:WikiProject Check Wikipedia/Translation' if ($project eq 'afwiki') ;
@@ -2352,7 +2352,7 @@ sub load_text_translation{
$translation_page = 'װיקיפּעדיע:קאנטראלירן_בלעטער/Translation' if ($project eq 'yiwiki') ;
$translation_page = '维基百科:错误检查专题/翻译' if ($project eq 'zhwiki') ;
-
+
my $translation_input = raw_text($translation_page);
$translation_input = replace_special_letters($translation_input);
#print $translation_input."\n";
@@ -2370,19 +2370,19 @@ sub load_text_translation{
# category_text
$input_text = get_translation_text($translation_input, 'category_001=', 'END' );
$category_text = $input_text if ($input_text ne '');
-
+
# priority
$input_text = get_translation_text($translation_input, 'top_priority_'.$project.'=', 'END' );
- $top_priority_project = $input_text if ($input_text ne '');
+ $top_priority_project = $input_text if ($input_text ne '');
$input_text = get_translation_text($translation_input, 'middle_priority_'.$project.'=', 'END' );
$middle_priority_project = $input_text if ($input_text ne '');
$input_text = get_translation_text($translation_input, 'lowest_priority_'.$project.'=', 'END' );
- $lowest_priority_project = $input_text if ($input_text ne '');
-
+ $lowest_priority_project = $input_text if ($input_text ne '');
+
+
-
# find error description
for (my $i = 1; $i < $number_of_error_description; $i++) {
my $current_error_number = 'error_';
@@ -2390,7 +2390,7 @@ sub load_text_translation{
$current_error_number = $current_error_number.'0' if ($i < 100);
$current_error_number = $current_error_number.$i;
#print $i, $current_error_number."\n";
-
+
# Priority
$error_description[$i][4] = get_translation_text($translation_input, $current_error_number.'_prio_'.$project.'=', 'END');
#print "x".$error_description[$i][4]."x"."\n";
@@ -2406,13 +2406,13 @@ sub load_text_translation{
$error_description[$i][4] = $error_description[$i][0];
}
#print $i."\t".$error_description[$i][0]."\t".$error_description[$i][4]."\n";
-
+
$error_description[$i][5] = get_translation_text($translation_input, $current_error_number.'_head_'.$project.'=', 'END');
$error_description[$i][6] = get_translation_text($translation_input, $current_error_number.'_desc_'.$project.'=', 'END');
#$error_description[$i][9] = get_translation_text_XHTML($error_description[$i][5]); # don't work
#$error_description[$i][10] = get_translation_text_XHTML($error_description[$i][6]); # don't work
}
-
+
}
sub get_translation_text {
@@ -2434,12 +2434,12 @@ sub get_translation_text {
sub get_translation_text_XHTML{
# don't work today
-
+
# use Wikipedia-API to get XHTML from Wikitext
# http://www.mediawiki.org/wiki/API:Parsing_wikitext#parse
# http://en.wikipedia.org/w/api.php?action=parse&text=%5B%5Bfoo%5D%5D%20%5B%5BAPI:Query|bar%5D%5D%20%5Bhttp://www.example.com/%20baz%5D
-
-
+
+
my $translation_text = $_[0];
my $xhtml_text = '';
print 'Translation='.$translation_text."\n";
@@ -2448,20 +2448,20 @@ sub get_translation_text_XHTML{
$url = $home;
$url =~ s/\/wiki\//\/w\//;
$url = $url.'api.php?action=parse&text='.$translation_text;
-
+
print 'URL='.$url."\n";
my $response ;
my $ua = LWP::UserAgent->new;
$response = $ua->get( $url );
my $content = $response->content;
$xhtml_text = $content if ($content) ;
-
+
# only text, delete all other
- my $pos = index($xhtml_text, 'text xml:space=');
+ my $pos = index($xhtml_text, 'text xml:space=');
$xhtml_text = substr ($xhtml_text ,$pos);
- $pos = index($xhtml_text, '')+length('');
+ $pos = index($xhtml_text, '')+length('');
$xhtml_text = substr ($xhtml_text ,$pos);
- $pos = index($xhtml_text, '></text>');
+ $pos = index($xhtml_text, '></text>');
$xhtml_text = substr ($xhtml_text ,0, $pos);
$pos = index($xhtml_text, '/g;
#$xhtml_text =~ s/"/"/g;
#$xhtml_text =~ s/'/'/g;
-
-
- }
+
+
+ }
print 'XHTML='.$xhtml_text ."\n";
return ($xhtml_text);
}
-
+
sub output_errors_desc_in_db{
if ($load_modus_done eq 'yes' and $dump_or_live eq 'live') {
@@ -2507,7 +2507,7 @@ sub output_errors_desc_in_db{
# | text_html_trans | varchar(4000) | YES | | NULL | |
# +-----------------+---------------+------+-----+---------+-------+
-
+
for (my $i = 1; $i < $number_of_error_description; $i++) {
@@ -2521,17 +2521,17 @@ sub output_errors_desc_in_db{
my $sql_desc_trans = $error_description[$i][6];
$sql_desc_trans =~ s/'/\\'/g;
$sql_desc = substr( $sql_desc_trans, 0, 3999); # max 4000
-
-
-
+
+
+
# insert or update error
- my $sql_text2 = "update cw_error_desc
- set prio=".$error_description[$i][4].",
+ my $sql_text2 = "update cw_error_desc
+ set prio=".$error_description[$i][4].",
name='".$sql_headline."' ,
text='".$sql_desc."',
name_trans='".$sql_headline_trans."' ,
- text_trans='".$sql_desc_trans."'
- where id = ". $i."
+ text_trans='".$sql_desc_trans."'
+ where id = ". $i."
and project = '". $project."'
;";
#print $sql_text2."\n" if ($i == 18 or $i ==67 or $i ==91);
@@ -2541,35 +2541,35 @@ sub output_errors_desc_in_db{
#print 'Update '.$x.' rows'."\n";
} else {
print 'new error - description insert into db'."\n";
- $sql_text2 = "insert into cw_error_desc (project, id, prio, name, text, name_trans, text_trans)
+ $sql_text2 = "insert into cw_error_desc (project, id, prio, name, text, name_trans, text_trans)
values ('". $project."', ". $i.", ".$error_description[$i][4].", '".$sql_headline."' ,'".$sql_desc."',
- '".$sql_headline_trans."' ,'".$sql_desc_trans."' );";
- # print $sql_text2."\n";
+ '".$sql_headline_trans."' ,'".$sql_desc_trans."' );";
+ # print $sql_text2."\n";
$sth = $dbh->prepare( $sql_text2 );
$sth->execute;
-
+
}
-
-
- }
+
+
+ }
}
}
-sub output_text_translation_wiki{
+sub output_text_translation_wiki{
# Output of translation-file
my $filename = $output_directory.$project.'/'.$project.'_'.$translation_file;
print 'Output translation:'."\t".$project.'_'.$translation_file."\n" if ($silent_modus ne 'silent');
-
+
open(TRANSLATION, ">$filename");
-
+
#######################################
print TRANSLATION '
'."\n";
print TRANSLATION ' new translation text under http://toolserver.org/~sk/checkwiki/'.$project.'/'. " (updated daily) \n";
-
+
print TRANSLATION '#########################'."\n";
print TRANSLATION '# metadata'."\n";
print TRANSLATION '#########################'."\n";
-
+
print TRANSLATION ' project='.$project." END\n";
print TRANSLATION ' category_001='.$category_text." END #for example: [[Category:Wikipedia]] \n";
print TRANSLATION "\n";
@@ -2579,13 +2579,13 @@ sub output_text_translation_wiki{
print TRANSLATION '#########################'."\n";
print TRANSLATION "\n";
print TRANSLATION ' start_text_'.$project.'='.$start_text." END\n";
-
+
print TRANSLATION '#########################'."\n";
print TRANSLATION '# description'."\n";
print TRANSLATION '#########################'."\n";
- print TRANSLATION "\n";
+ print TRANSLATION "\n";
print TRANSLATION ' description_text_'.$project.'='.$description_text." END\n";
-
+
print TRANSLATION '#########################'."\n";
print TRANSLATION '# priority'."\n";
print TRANSLATION '#########################'."\n";
@@ -2599,9 +2599,9 @@ sub output_text_translation_wiki{
print TRANSLATION ' lowest_priority_'.$project.'='.$lowest_priority_project." END\n";
print TRANSLATION "\n";
print TRANSLATION " Please only translate the variables with …_".$project." at the end of the name. Not …_script= .\n";
-
-
+
+
########################################
#my $number_of_error_description = 1;
#while ($error_description[$number_of_error_description][1] ne '') {
@@ -2609,7 +2609,7 @@ sub output_text_translation_wiki{
# $number_of_error_description = $number_of_error_description + 1;
#}
#until ($error_description[$number_of_error_description][1] ne ''); # english Headline existed
-
+
print 'error description:'."\t".$number_of_error_description." (-1) \n" if ($silent_modus ne 'silent');
print TRANSLATION '#########################'."\n";
print TRANSLATION '# error description'."\n";
@@ -2620,10 +2620,10 @@ sub output_text_translation_wiki{
print TRANSLATION '# prio = 2 (middle priority)'."\n";
print TRANSLATION '# prio = 3 (lowest priority)'."\n";
print TRANSLATION "\n";
-
-
+
+
for (my $i = 1; $i < $number_of_error_description; $i++) {
-
+
my $current_error_number = 'error_';
$current_error_number = $current_error_number.'0' if ($i < 10);
$current_error_number = $current_error_number.'0'.$i if ($i < 100);
@@ -2636,11 +2636,11 @@ sub output_text_translation_wiki{
print TRANSLATION "\n";
print TRANSLATION '###########################################################################'."\n";
print TRANSLATION "\n";
- }
-
+ }
+
print TRANSLATION '
'."\n";
- close(TRANSLATION);
-
+ close(TRANSLATION);
+
}
sub output_little_statistic{
@@ -2653,7 +2653,7 @@ sub output_duration {
my $duration = $time_end - $time_start;
my $duration_minutes = int($duration / 60);
my $duration_secounds = int(((int(100 * ($duration / 60)) / 100)-$duration_minutes)*60);
-
+
print 'Duration:'."\t\t".$duration_minutes.' minutes '.$duration_secounds.' secounds'."\n";
print $project.' '.$dump_or_live."\n" if ($silent_modus ne 'silent');
}
@@ -2661,7 +2661,7 @@ sub output_duration {
#############################################################################
sub check_article{
-
+
my $steps = 500;
$steps = 1 if ($dump_or_live eq 'live');
$steps = 5000 if ($silent_modus eq 'silent');
@@ -2672,7 +2672,7 @@ sub check_article{
or $title eq 'Liste der Ortsteile im Saarland') {
# $details_for_page = 'yes';
}
-
+
my $text_for_tests = "Hallo
Barnaby, Wendy. The Plague Makers: The Secret World of Biological Warfare, Frog Ltd, 1999.
in en [[Japanese war crimes]]
@@ -2808,16 +2808,16 @@ sub check_article{
===== PPM, PGM, PBM, PNM =====
" .'test –uberlappung3456Ende des Text';
-
+
# $text = $text_for_tests;
-
+
get_namespace();
print_article_title_every_x( $steps );
delete_old_errors_in_db();
-
+
get_comments_nowiki_pre();
-
- get_math();
+
+ get_math();
get_source();
get_code();
get_syntaxhighlight();
@@ -2829,24 +2829,24 @@ sub check_article{
get_gallery();
get_hiero(); #problem with <-- and --> (error 056)
get_ref();
-
+
check_for_redirect();
get_categories();
get_interwikis();
-
+
create_line_array();
get_line_first_blank();
get_headlines();
-
+
error_check();
-
-
+
+
#get_coordinates() if (-e $file_module_coordinate) ;
#get_persondata();
-
+
set_article_as_scan_live_in_db($title, $page_id) if ($dump_or_live eq 'live');
-
-
+
+
}
sub print_article_title_every_x{
@@ -2860,7 +2860,7 @@ sub print_article_title_every_x{
$project_output =~ s/wiki//;
$counter_output .= $project_output.' ';
$counter_output .= 'p='.$page_number.' ';
-
+
if ($dump_or_live eq 'live') {
my $output_current_live_article = $current_live_article + 1;
$counter_output .= $current_live_error_scan.'/'.$output_current_live_article.'/'.$number_article_live_to_scan;
@@ -2872,14 +2872,14 @@ sub print_article_title_every_x{
}
print LOGFILE $counter_output if ($starter_modus ne 'starter');
-
+
}
sub delete_old_errors_in_db{
# delete article in database
#print $page_id."\t".$title."\n";
- if ( $dump_or_live eq 'live'
- and $page_id
+ if ( $dump_or_live eq 'live'
+ and $page_id
and $title ne '' ) {
my $sql_text = "delete from cw_error where error_id = ". $page_id." and project = '". $project."';";
#print $sql_text."\n\n";
@@ -2894,24 +2894,24 @@ sub get_namespace{
if ( index( $title, ':' ) > -1) {
#print 'Get namespace for: '.$title."\n";
for (my $i = 0; $i < $namespaces_count; $i++) {
- #print $i." ".$namespace[$i][0]." ".$namespace[$i][1]." ".$namespace[$i][2] ."\n" ;#if ($title eq 'Sjabloon:Gemeente');
+ #print $i." ".$namespace[$i][0]." ".$namespace[$i][1]." ".$namespace[$i][2] ."\n" ;#if ($title eq 'Sjabloon:Gemeente');
$page_namespace = $namespace[$i][0] if ( index ($title, $namespace[$i][1].':') == 0);
$page_namespace = $namespace[$i][0] if ( index ($title, $namespace[$i][2].':') == 0);
}
-
+
#print $page_namespace."\n" ;#if ($title eq 'Sjabloon:Gemeente');
#print $namespacealiases_count."\n";
for (my $i = 0; $i < $namespacealiases_count; $i++) {
- #print $i." ".$namespacealiases[$i][0]." ".$namespacealiases[$i][1] ."\n" ;#if ($title eq 'Sjabloon:Gemeente');
+ #print $i." ".$namespacealiases[$i][0]." ".$namespacealiases[$i][1] ."\n" ;#if ($title eq 'Sjabloon:Gemeente');
$page_namespace = $namespacealiases[$i][0] if ( index ($title, $namespacealiases[$i][1].':') == 0);
- }
- #print $page_namespace."\n" ;#if ($title eq 'Sjabloon:Gemeente');
+ }
+ #print $page_namespace."\n" ;#if ($title eq 'Sjabloon:Gemeente');
$page_namespace = 0 if ($page_namespace == -100);
} else {
$page_namespace = 0;
}
-
+
}
@@ -2924,14 +2924,14 @@ sub get_comments_nowiki_pre{
my $pos_first = -1;
my $loop_again = 0;
do {
-
+
# next tag
$pos_comment = index ($text, '', $pos_comment + length('
@@ -2964,7 +2964,7 @@ sub get_comments_nowiki_pre{
error_005_Comment_no_correct_end ('check', $text_output );
#print $text_output."\n";
}
-
+
#nowiki
if ($tag_first eq 'nowiki' and $pos_nowiki_end > -1) {
# found and
@@ -2980,8 +2980,8 @@ sub get_comments_nowiki_pre{
my $text_output = substr( $text,$pos_nowiki);
$text_output = text_reduce($text_output, 80);
error_023_nowiki_no_correct_end('check', $text_output );
- }
-
+ }
+
#pre
if ($tag_first eq 'pre' and $pos_pre_end > -1) {
# found
and
@@ -2998,21 +2998,21 @@ sub get_comments_nowiki_pre{
my $text_output = substr( $text,$pos_pre);
$text_output = text_reduce($text_output, 80);
error_024_pre_no_correct_end ('check', $text_output);
- }
-
- #end
+ }
+
+ #end
if ($pos_comment == -1
- and $pos_nowiki == -1
+ and $pos_nowiki == -1
and $pos_pre == -1) {
- # found no ', $pos_start + length('');
@@ -3080,7 +3080,7 @@ sub get_next_comment{
$comments[$comment_counter][1] = $pos_end;
$comments[$comment_counter][2] = substr($text, $pos_start, $pos_end - $pos_start );
#print $comments[$comment_counter][2]."\n";
-
+
#replace comment with space
my $text_before = substr( $text, 0, $pos_start );
my $text_after = substr( $text, $pos_end );
@@ -3110,33 +3110,33 @@ sub get_math {
my $pos_start2 = index ( lc($text), '' , $pos_start + length('');
- #print substr($text, $pos_start, $pos_end - $pos_start )."\n";
+ #print substr($text, $pos_start, $pos_end - $pos_start )."\n";
$end_search = 'no';
$pos_start_old = $pos_end;
@@ -3149,15 +3149,15 @@ sub get_math {
$filler = $filler.' ';
}
$text = $text_before.$filler.$text_after;
- }
+ }
if ($pos_start > -1 and $pos_end == -1) {
error_013_Math_no_correct_end ('check', substr( $text, $pos_start, 50) );
#print 'Math:'.substr( $text, $pos_start, 50)."\n";
$end_search = 'yes';
}
-
+
}
- until ( $end_search eq 'yes') ;
+ until ( $end_search eq 'yes') ;
}
sub get_source {
@@ -3171,17 +3171,17 @@ sub get_source {
$end_search = 'yes';
#get position of next