diff --git a/.dockerignore b/.dockerignore index 216553b..d6e4d59 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,7 +1,6 @@ * !entities/out !quotes/out -!*.pl !build.sh !lint.sh !Cargo.lock diff --git a/.gitattributes b/.gitattributes index 38251b8..6313b56 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,8 +1 @@ -*.conf text eol=lf -*.inc text eol=lf -*.json text eol=lf -*.md text eol=lf -*.pl text eol=lf -*.py text eol=lf -*.sh text eol=lf -*.yml text eol=lf +* text=auto eol=lf diff --git a/.pre-process-annotate-attributes.pl b/.pre-process-annotate-attributes.pl deleted file mode 100644 index d22d455..0000000 --- a/.pre-process-annotate-attributes.pl +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use v5.10.0; - -my $state = undef; - -my $mode = 'bored'; -my %attributes = (); -my @lines = (); -my %instances = (); -while (defined($_ = <>)) { - my $line = \"$_"; - push(@lines, $line); - if ($_ eq "

Attributes

\n") { - $mode = 'index'; - } else { - if ($mode eq 'bored') { - if ($_ eq "
Content attributes:
\n") { - $mode = 'element'; - } - } elsif ($mode eq 'element') { - if ($_ eq "
Global attributes
\n") { - # ignore - } elsif ($_ =~ m!^
.*.+(.*)
\n$!os) { - my $key = $1; - my $notes = $2; - my $special = ''; - $special = 'global' if $notes =~ m!special semantics!os; - $special = 'alt' if $notes =~ m@@os; - if (not exists $instances{$key}) { - $instances{$key} = []; - } - if ($notes !~ m@@os) { - push(@{$instances{$key}}, { line => $line, special => $special }); - } - } elsif ($_ =~ m/^ \n$/os) { - # ignore - } elsif ($_ eq "
Any other attribute that has no namespace (see prose).
\n") { - # ignore - } elsif ($_ =~ m!^
!o) { - $mode = 'bored'; - } else { - # ignore - } - } elsif ($mode eq 'index') { - if ($_ eq " \n") { - $mode = 'end'; - } elsif ($_ eq " \n") { - $mode = 'tr'; - } else { - # ignore... - } - } elsif ($mode eq 'tr') { - if ($_ =~ m!^ <(?:code|span) data-x="([^"]+)">[^<]*(?: \(in [^\)]+\))?;?\n$!os) { - $attributes{$1} = 1; - $mode = 'index-in'; - } else { - # ignore... - } - } elsif ($mode eq 'index-in') { - if ($_ =~ m!^ <(?:code|span) data-x="([^"]+)">[^<]*(?: \(in [^\)]+\))?;?\n$!os) { - $attributes{$1} = 1; - } elsif ($_ =~ m@^ (.+?)(?:)?\n$@os) { - local $" = ', '; - my $description = $1; - my $altdescription = $2; - foreach my $key (keys %attributes) { - foreach my $entry (@{$instances{$key}}) { - my $line = $entry->{line}; - if ($entry->{special} eq 'global') { - $$line =~ s!(\.\n)$!: $description<\!--SPECIAL-->$1!os; - $$line =~ s!<\!--SPECIAL-->: *([^ ])!; \l$1!os; - } elsif ($entry->{special} eq 'alt') { - die "$key wants alt description but we have none" unless defined $altdescription; - $$line =~ s!(\n)$! — $altdescription$1!os; - } else { - $$line =~ s!(\n)$! — $description$1!os; - } - } - } - %attributes = (); - $mode = 'index'; - } else { - die "$.: unexpected line in index: $_"; - } - } else { - # ignore - } - } -} - -foreach (@lines) { - $$_ =~ s///gos; - print $$_; -} diff --git a/.pre-process-index-generator.pl b/.pre-process-index-generator.pl deleted file mode 100644 index d495d8d..0000000 --- a/.pre-process-index-generator.pl +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $seenInsertionPoint = 0; -my $after = ''; - -my %definitions; -my $inpre = 0; -while (<>) { - $inpre = 1 if /
]*)( id="?([^ ">]*)"?)?[^>]*>([^<:]*)?<\/(span|dfn|a)>/os) {
-        my $partial = $1;
-        my $id;
-        my $name;
-        if ($partial) {
-            if ($4) {
-                ($id, $name) = ($4, $5);
-            } else {
-                die "partial interface entry for $5 is missing an id (required for interface index)";
-            }
-            $definitions{$name} = { } unless defined $definitions{$name};
-            $definitions{$name}{partial} = [] unless exists $definitions{$name}{partial};
-            push @{$definitions{$name}{partial}}, $id;
-        } else {
-            $name = $5;
-            $definitions{$name} = { } unless defined $definitions{$name};
-            die "duplicate interface definitions for $name" if exists $definitions{$name}{primary};
-        }
-    }
-    $inpre = 0 if /<\/pre>/os;
-    if (/^INSERT INTERFACES HERE\n?$/os) {
-        $seenInsertionPoint = 1;
-    } else {
-        if ($seenInsertionPoint) {
-            $after .= $_;
-        } else {
-            print $_;
-        }
-    }
-}
-
-die unless $seenInsertionPoint;
-
-print "  \n";
-print $after;
diff --git a/.pre-process-main.pl b/.pre-process-main.pl
deleted file mode 100644
index 023ffce..0000000
--- a/.pre-process-main.pl
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/perl -w
-# -d:DProf
-use strict;
-use File::Path;
-use File::Basename;
-use Time::HiRes qw(time);
-
-$| = 1;
-
-my $verbose = $ARGV[0] && '--verbose' eq "$ARGV[0]";
-sub report($) { print STDERR $_[0] if $verbose; }
-
-report "Loading...";
-my @lines = ;
-
-report "\rPreprocessing...";
-
-# monitoring
-my $lineNumber = 0;
-my $lastProgress = -1;
-my $maxProgress = 20;
-my $start = time;
-
-# actual work
-my $currentRepresents = '';
-my $represents = {};
-
-while (@lines) {
-    my $line = shift @lines;
-    $lineNumber += 1;
-    my $progress = int($maxProgress * $lineNumber / (scalar @lines + $lineNumber));
-    if ($progress != $lastProgress) {
-        my $rate = 0;
-        my $elapsed = (time - $start);
-        if ($elapsed > 0) {
-            $rate = $lineNumber / $elapsed;
-        }
-        report sprintf "\rParsing... [" . ('#' x $progress) . (' ' x ($maxProgress - $progress)) . "] %5d lines per second", $rate;
-        $lastProgress = $progress;
-    }
-
-    if ($line =~ m|^(.*)(.*)\n$|os) {
-        unshift @lines, split("\n", $1 . `cat $ENV{'HTML_CACHE'}/$2` . $3);
-        next;
-    } elsif ($line =~ m!^( *)(]*>(?:]*>)?)EXAMPLE (offline/|workers/|canvas/)((?:[-a-z0-9]+/){1,2}[-a-z0-9]+.[-a-z0-9]+)((?:)?
) *\n$!os) { - my $indent = $1; - my $starttags = $2; - my $folder = $3; - my $example = $4; - my $endtags = $5; - - my $data; - my $fh; - - open($fh, "<:encoding(UTF-8)", "$ENV{'HTML_SOURCE'}/demos/$folder$example") - or die "\rCannot open $ENV{'HTML_SOURCE'}/demos/$folder$example"; - while (<$fh>) { - $data .= $_; - } - close $fh; - - $data =~ s/&/&/gos; - $data =~ s/The ([^<]+) element represents (.*)

*\n$|os) { - $represents->{$1} = "\u$2"; - } elsif ($line =~ m|^ *

The ([^<]+) element represents ?(.*)\n$|os) { - $currentRepresents = $1; - $represents->{$currentRepresents} = "\u$2"; - } elsif ($currentRepresents) { - if ($line =~ m|^ *(.*)

*\n$|os) { - $represents->{$currentRepresents} .= " $1"; - $currentRepresents = ''; - } elsif ($line =~ m|^ *(.+?) *\n$|os) { - $represents->{$currentRepresents} .= " $1"; - } else { - die "missed end of <$currentRepresents> intro.\n"; - } - } - $line =~ s|| if (exists $represents->{$1}) { $represents->{$1} } else { die "\nUnknown element <$1> used in REPRESENTS pragma.\n" }|gose; - - # This seems to be necessary due to the file substitutions, for some reason. - $line = normalizeNewlines($line); - - print "$line"; -} -report "\n"; - -sub normalizeNewlines { - $_ = shift; - chomp; - return "$_\n"; -} diff --git a/.pre-process-tag-omission.pl b/.pre-process-tag-omission.pl deleted file mode 100644 index 691f852..0000000 --- a/.pre-process-tag-omission.pl +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -my $state = undef; - -my $mode = 'bored'; -my $current = ''; -my %voids = (); -my %insertionPoints = (); -my @lines = (); - -sub pushLine { - my($element, $text) = @_; - die unless exists $insertionPoints{$element}; - my $line = $insertionPoints{$element}; - if ($$line eq '') { - $$line .= "
Tag omission in text/html:
\n"; - } - $text =~ s!<(/?)p>!<${1}dd>!g; - $text =~ s!may!can!g; - $$line .= " $text"; -} - -while (defined($_ = <>)) { - if ($mode eq 'bored') { - if ($_ eq "
Optional tags
\n") { - $mode = 'optionals'; - } elsif ($_ eq "
Void elements
\n") { - $mode = 'voids'; - } elsif ($_ =~ m!([^<]+) elements?! - || $_ =~ m!id="the-[^-]+-element"[^>]*>The ]*>([^<]+)!) { - $current = $1; - $mode = 'element'; - } - } elsif ($mode eq 'element') { - if ($_ eq "
Content attributes:
\n") { - my $line = ''; - push(@lines, \$line); - $insertionPoints{$current} = \$line; - $mode = 'bored'; - } else { - # ignore... - } - } elsif ($mode eq 'voids') { - if ($_ =~ m!([^<]+)!g) { - $voids{$1} = 1; - } - } - } elsif ($mode eq 'optionals') { - if ($_ =~ m!

An? ([^<]+)!) { - $current = $1; - pushLine($current, $_); - $mode = 'optionals-in'; - } elsif ($_ =~ m!!) { - $mode = 'optionals'; - } - pushLine($current, $_); - } elsif ($mode eq 'done') { - # ignore... - } else { - die 'unknown mode'; - } - my $line = "$_"; - push(@lines, \$line); -} - -foreach (keys %insertionPoints) { - my $line = $insertionPoints{$_}; - if ($$line eq '') { - if ($voids{$_}) { - pushLine($_, "

No end tag.

\n"); - } else { - pushLine($_, "

Neither tag is omissible.

\n"); - } - } -} - -foreach (@lines) { - print $$_; -} diff --git a/Dockerfile b/Dockerfile index 5dd646b..773335e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,5 +20,4 @@ RUN pipx install bs-highlighter COPY . /whatwg/html-build/ ENV SKIP_BUILD_UPDATE_CHECK true -ENV PROCESS_WITH_RUST true ENTRYPOINT ["bash", "/whatwg/html-build/build.sh"] diff --git a/README.md b/README.md index ead701b..7c620ab 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Local builds will be fastest, but require installing a lot of prerequisites. Usi To build locally, you'll need the following commands installed on your system: -- `curl`, `grep`, `perl`, `unzip` +- `curl`, `grep`, `perl`, `unzip`, `cargo` Optionally, for faster builds, you can install [Wattsi](https://github.com/whatwg/wattsi). If you don't bother with that, we will use the [build server](https://github.com/whatwg/build.whatwg.org), which requires an internet connection. diff --git a/build.sh b/build.sh index 6b2eccf..366bb29 100755 --- a/build.sh +++ b/build.sh @@ -33,7 +33,6 @@ HTML_CACHE=${HTML_CACHE:-$DIR/.cache} HTML_TEMP=${HTML_TEMP:-$DIR/.temp} HTML_OUTPUT=${HTML_OUTPUT:-$DIR/output} HTML_GIT_CLONE_OPTIONS=${HTML_GIT_CLONE_OPTIONS:-"--depth=2"} -PROCESS_WITH_RUST=${PROCESS_WITH_RUST:-false} # These are used by child scripts, and so we export them export HTML_CACHE @@ -640,23 +639,13 @@ function processSource { BUILD_TYPE="$2" cp -p entities/out/entities.inc "$HTML_CACHE" cp -p entities/out/entities-dtd.url "$HTML_CACHE" - if [[ $PROCESS_WITH_RUST == "true" ]]; then - if hash html-build 2>/dev/null; then - html-build <"$HTML_SOURCE/$SOURCE_LOCATION" >"$HTML_TEMP/source-whatwg-complete" - else - CARGO_ARGS=( --release ) - $VERBOSE && CARGO_ARGS+=( --verbose ) - $QUIET && CARGO_ARGS+=( --quiet ) - cargo run "${CARGO_ARGS[@]}" <"$HTML_SOURCE/$SOURCE_LOCATION" >"$HTML_TEMP/source-whatwg-complete" - fi + if hash html-build 2>/dev/null; then + html-build <"$HTML_SOURCE/$SOURCE_LOCATION" >"$HTML_TEMP/source-whatwg-complete" else - if $VERBOSE; then - perl .pre-process-main.pl --verbose < "$HTML_SOURCE/$SOURCE_LOCATION" > "$HTML_TEMP/source-expanded-1" - else - perl .pre-process-main.pl < "$HTML_SOURCE/$SOURCE_LOCATION" > "$HTML_TEMP/source-expanded-1" - fi - perl .pre-process-annotate-attributes.pl < "$HTML_TEMP/source-expanded-1" > "$HTML_TEMP/source-expanded-2" # this one could be merged - perl .pre-process-tag-omission.pl < "$HTML_TEMP/source-expanded-2" | perl .pre-process-index-generator.pl > "$HTML_TEMP/source-whatwg-complete" # this one could be merged + CARGO_ARGS=( --release ) + $VERBOSE && CARGO_ARGS+=( --verbose ) + $QUIET && CARGO_ARGS+=( --quiet ) + cargo run "${CARGO_ARGS[@]}" <"$HTML_SOURCE/$SOURCE_LOCATION" >"$HTML_TEMP/source-whatwg-complete" fi runWattsi "$HTML_TEMP/source-whatwg-complete" "$HTML_TEMP/wattsi-output" "$HIGHLIGHT_SERVER_URL" diff --git a/ci-build/docker-build.sh b/ci-build/docker-build.sh index c01403c..957b94b 100755 --- a/ci-build/docker-build.sh +++ b/ci-build/docker-build.sh @@ -13,12 +13,11 @@ function main { # We want the image to contain: # * All of the important stuff from the top-level (html-build) directory # * But, the Dockerfile from this (ci-build) directory - # And in particular it should *not* contain the top-level Dockerfile, non-.pl dotfiles, .git/, and + # And in particular it should *not* contain the top-level Dockerfile, dotfiles, .git/, and # any html/ and output/ directories that might be hanging around from local testing. cp "$here/Dockerfile" "$TMP_DIR" cd "$here/.." cp -r !(.*|html|output|Dockerfile) "$TMP_DIR" - cp .*.pl "$TMP_DIR" cd "$TMP_DIR" trap cleanTemp EXIT diff --git a/ci-build/inside-container.sh b/ci-build/inside-container.sh index 292da8b..efdd85e 100644 --- a/ci-build/inside-container.sh +++ b/ci-build/inside-container.sh @@ -6,7 +6,7 @@ cd "$(dirname "$0")/../.." PDF_SERVE_PORT=8080 -PROCESS_WITH_RUST=true SKIP_BUILD_UPDATE_CHECK=true ./html-build/build.sh +SKIP_BUILD_UPDATE_CHECK=true ./html-build/build.sh echo "" echo "Running conformance checker..."