Skip to content

Commit

Permalink
Merge pull request #562 from jaytaph/css3-location-fix
Browse files Browse the repository at this point in the history
Added improvements on the CSS3 user stylesheet parser
  • Loading branch information
jaytaph authored Aug 27, 2024
2 parents cfb06ca + 9428af1 commit a74b431
Show file tree
Hide file tree
Showing 66 changed files with 2,709 additions and 1,153 deletions.
17 changes: 15 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ name = "html5-parser"
name = "tree_iterator"
harness = false

[[bench]]
name = "bytestream"
harness = false

[dependencies]
gosub_shared = { path = "./crates/gosub_shared", features = [] }
gosub_config = { path = "./crates/gosub_config", features = [] }
Expand Down
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ SHELL=/usr/bin/env bash -O globstar

all: help

test: test_commands test_unit test_clippy test_fmt ## Runs tests
test: test_unit test_clippy test_fmt test_commands ## Runs tests

bench: ## Benchmark the project
cargo bench
Expand All @@ -14,14 +14,16 @@ build: ## Build the project
section "Cargo build" ;\
cargo build --all

format: ## Fix formatting and clippy errors
fix-format: ## Fix formatting and clippy errors
cargo fmt --all
cargo clippy --all --fix --allow-dirty --allow-staged

check-format: test_clippy test_fmt ## Check the project for clippy and formatting errors

test_unit:
source test-utils.sh ;\
section "Cargo test" ;\
cargo test --verbose --all --no-fail-fast --all-features --all-targets
cargo test --all --no-fail-fast --all-features --all-targets

test_clippy:
source test-utils.sh ;\
Expand Down
27 changes: 27 additions & 0 deletions benches/bytestream.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use std::fs::File;

use criterion::{criterion_group, criterion_main, Criterion};
use gosub_shared::byte_stream::{ByteStream, Encoding, Stream};

fn utf8_testfile(c: &mut Criterion) {
let mut group = c.benchmark_group("Bytestream test");
group.significance_level(0.1).sample_size(500);

let html_file = File::open("tests/data/bytestream/utf8.txt").unwrap();
let mut stream = ByteStream::new(Encoding::UTF8, None);
let _ = stream.read_from_file(html_file);
stream.close();

group.bench_function("utf8 test file", |b| {
b.iter(|| {
while !stream.eof() {
stream.read_and_next();
}
})
});

group.finish();
}

criterion_group!(benches, utf8_testfile);
criterion_main!(benches);
12 changes: 5 additions & 7 deletions benches/tree_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
use gosub_html5::node::NodeId;
use gosub_html5::parser::document::{Document, DocumentBuilder, TreeIterator};
use gosub_html5::parser::Html5Parser;
use gosub_shared::byte_stream::ByteStream;
use gosub_shared::byte_stream::{ByteStream, Encoding};

fn wikipedia_main_page(c: &mut Criterion) {
// Criterion can report inconsistent results from run to run in some cases. We attempt to
Expand All @@ -14,9 +14,8 @@ fn wikipedia_main_page(c: &mut Criterion) {
group.significance_level(0.1).sample_size(500);

let html_file = File::open("tests/data/tree_iterator/wikipedia_main.html").unwrap();
let mut stream = ByteStream::new();
let _ = stream.read_from_file(html_file, Some(gosub_shared::byte_stream::Encoding::UTF8));
stream.set_confidence(gosub_shared::byte_stream::Confidence::Certain);
let mut stream = ByteStream::new(Encoding::UTF8, None);
let _ = stream.read_from_file(html_file);

let main_document = DocumentBuilder::new_document(None);
let document = Document::clone(&main_document);
Expand All @@ -41,9 +40,8 @@ fn stackoverflow_home(c: &mut Criterion) {

// using the main page of (english) wikipedia as a rough estimate of traversing a decently sized website
let html_file = File::open("tests/data/tree_iterator/stackoverflow.html").unwrap();
let mut bytestream = ByteStream::new();
let _ = bytestream.read_from_file(html_file, Some(gosub_shared::byte_stream::Encoding::UTF8));
bytestream.set_confidence(gosub_shared::byte_stream::Confidence::Certain);
let mut bytestream = ByteStream::new(Encoding::UTF8, None);
let _ = bytestream.read_from_file(html_file);

let main_document = DocumentBuilder::new_document(None);
let document = Document::clone(&main_document);
Expand Down
5 changes: 2 additions & 3 deletions crates/gosub_bindings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub mod wrapper;
use gosub_html5::parser::document::{Document, DocumentBuilder};
use gosub_html5::parser::Html5Parser;
use gosub_rendering::render_tree::{Node, NodeType, RenderTree, TreeIterator};
use gosub_shared::byte_stream::{ByteStream, Confidence, Encoding};
use gosub_shared::byte_stream::{ByteStream, Encoding};
use wrapper::node::CNode;

/// Initialize a render tree and return an owning pointer to it.
Expand All @@ -30,9 +30,8 @@ pub unsafe extern "C" fn gosub_rendertree_init(html: *const c_char) -> *mut Rend
return ptr::null_mut();
}
};
let mut stream = ByteStream::new();
let mut stream = ByteStream::new(Encoding::UTF8, None);
stream.read_from_str(html_str, Some(Encoding::UTF8));
stream.set_confidence(Confidence::Certain);
stream.close();

let doc = DocumentBuilder::new_document(None);
Expand Down
19 changes: 14 additions & 5 deletions crates/gosub_css3/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::node::Node;
use crate::node::{Node, NodeType};
use crate::parser_config::{Context, ParserConfig};
use crate::tokenizer::Tokenizer;
use gosub_shared::byte_stream::{ByteStream, Encoding, Location};
Expand Down Expand Up @@ -46,16 +46,25 @@ impl<'stream> Css3<'stream> {
pub fn parse(data: &str, config: ParserConfig) -> Result<Node, Error> {
let t_id = timing_start!("css3.parse", config.source.as_deref().unwrap_or(""));

let mut stream = ByteStream::new();
let mut stream = ByteStream::new(Encoding::UTF8, None);
stream.read_from_str(data, Some(Encoding::UTF8));
stream.close();

let mut parser = Css3::new(&mut stream);
let ret = parser.parse_internal(config);
let result = parser.parse_internal(config);

timing_stop!(t_id);

ret
match result {
Ok(Some(node)) => Ok(node),
Ok(None) => Ok(Node::new(
NodeType::StyleSheet {
children: Vec::new(),
},
Location::default(),
)),
Err(e) => Err(e),
}
}

/// Create a new parser with the given bytestream
Expand All @@ -68,7 +77,7 @@ impl<'stream> Css3<'stream> {
}

/// Actual parser implementation
fn parse_internal(&mut self, config: ParserConfig) -> Result<Node, Error> {
fn parse_internal(&mut self, config: ParserConfig) -> Result<Option<Node>, Error> {
self.config = config;

match self.config.context {
Expand Down
7 changes: 7 additions & 0 deletions crates/gosub_css3/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ pub enum NodeType {
Container {
children: Vec<Node>,
},
Range {
left: Node,
left_comparison: Node,
middle: Node,
right_comparison: Option<Node>,
right: Option<Node>,
},
}

/// A node is a single element in the AST
Expand Down
31 changes: 21 additions & 10 deletions crates/gosub_css3/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ impl Css3<'_> {
if t.token_type != token_type {
return Err(Error::new(
format!("Expected {:?}, got {:?}", token_type, t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}

Expand All @@ -44,7 +44,7 @@ impl Css3<'_> {
TokenType::Function(name) => Ok(name),
_ => Err(Error::new(
format!("Expected function, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -55,7 +55,7 @@ impl Css3<'_> {
TokenType::Number(value) => Ok(value),
_ => Err(Error::new(
format!("Expected number, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -66,7 +66,7 @@ impl Css3<'_> {
TokenType::Delim(c) => Ok(c),
_ => Err(Error::new(
format!("Expected delimiter, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -77,7 +77,7 @@ impl Css3<'_> {
TokenType::QuotedString(s) => Ok(s),
_ => Err(Error::new(
format!("Expected string, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -88,7 +88,7 @@ impl Css3<'_> {
TokenType::Delim(c) if c == delimiter => Ok(c),
_ => Err(Error::new(
format!("Expected delimiter '{}', got {:?}", delimiter, t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -97,7 +97,7 @@ impl Css3<'_> {
loop {
let t = self.tokenizer.consume();
match t.token_type {
TokenType::Whitespace | TokenType::Comment(_) => {
TokenType::Whitespace(_) | TokenType::Comment(_) => {
// just eat it
}
_ => {
Expand All @@ -114,7 +114,7 @@ impl Css3<'_> {
TokenType::Ident(s) if s.eq_ignore_ascii_case(ident) => Ok(s),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -125,18 +125,29 @@ impl Css3<'_> {
TokenType::Ident(s) if s == ident => Ok(s),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}

pub fn consume_any_ident(&mut self) -> Result<String, Error> {
let t = self.tokenizer.consume();

match t.token_type {
TokenType::Delim('.') => {
let t = self.tokenizer.consume();
match t.token_type {
TokenType::Ident(s) => Ok(format!(".{}", s)),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location(),
)),
}
}
TokenType::Ident(s) => Ok(s),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand Down
14 changes: 7 additions & 7 deletions crates/gosub_css3/src/parser/anplusb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ impl Css3<'_> {
if unit.chars().nth(0).unwrap().to_lowercase().to_string() != "n" {
return Err(Error::new(
format!("Expected n, found {}", unit).to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
Ok(if unit.len() == 1 {
Expand Down Expand Up @@ -43,7 +43,7 @@ impl Css3<'_> {
if !allow_sign {
return Err(Error::new(
format!("Unexpected sign {}", sign).to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
pos += 1;
Expand All @@ -68,7 +68,7 @@ impl Css3<'_> {
if nval != c {
return Err(Error::new(
format!("Expected {}", c).to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}

Expand Down Expand Up @@ -113,7 +113,7 @@ impl Css3<'_> {
self.tokenizer.lookahead(0).token_type
)
.to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
};
Expand Down Expand Up @@ -199,7 +199,7 @@ impl Css3<'_> {
pub fn parse_anplusb(&mut self) -> Result<Node, Error> {
log::trace!("parse_anplusb");

let loc = self.tokenizer.current_location().clone();
let loc = self.tokenizer.current_location();

let mut a = String::from("1");
let mut b;
Expand Down Expand Up @@ -230,7 +230,7 @@ impl Css3<'_> {
self.tokenizer.reconsume();
return Err(Error::new(
"Expected anplusb".to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
}
Expand All @@ -254,7 +254,7 @@ mod test {

macro_rules! test {
($func:ident, $input:expr, $expected:expr) => {
let mut stream = ByteStream::new();
let mut stream = ByteStream::new(Encoding::UTF8, None);
stream.read_from_str($input, Some(Encoding::UTF8));
stream.close();

Expand Down
Loading

0 comments on commit a74b431

Please sign in to comment.