Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added improvements on the CSS3 user stylesheet parser #562

Merged
merged 12 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ name = "html5-parser"
name = "tree_iterator"
harness = false

[[bench]]
name = "bytestream"
harness = false

[dependencies]
gosub_shared = { path = "./crates/gosub_shared", features = [] }
gosub_config = { path = "./crates/gosub_config", features = [] }
Expand Down
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ SHELL=/usr/bin/env bash -O globstar

all: help

test: test_commands test_unit test_clippy test_fmt ## Runs tests
test: test_unit test_clippy test_fmt test_commands ## Runs tests

bench: ## Benchmark the project
cargo bench
Expand All @@ -14,10 +14,12 @@ build: ## Build the project
section "Cargo build" ;\
cargo build --all

format: ## Fix formatting and clippy errors
fix-format: ## Fix formatting and clippy errors
cargo fmt --all
cargo clippy --all --fix --allow-dirty --allow-staged

check-format: test_clippy test_fmt ## Check the project for clippy and formatting errors

test_unit:
source test-utils.sh ;\
section "Cargo test" ;\
Expand Down
27 changes: 27 additions & 0 deletions benches/bytestream.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use std::fs::File;

use criterion::{criterion_group, criterion_main, Criterion};
use gosub_shared::byte_stream::{ByteStream, Encoding, Stream};

fn utf8_testfile(c: &mut Criterion) {
let mut group = c.benchmark_group("Bytestream test");
group.significance_level(0.1).sample_size(500);

let html_file = File::open("tests/data/bytestream/utf8.txt").unwrap();
let mut stream = ByteStream::new(Encoding::UTF8, None);
let _ = stream.read_from_file(html_file);
stream.close();

group.bench_function("utf8 test file", |b| {
b.iter(|| {
while !stream.eof() {
stream.read_and_next();
}
})
});

group.finish();
}

criterion_group!(benches, utf8_testfile);
criterion_main!(benches);
12 changes: 5 additions & 7 deletions benches/tree_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
use gosub_html5::node::NodeId;
use gosub_html5::parser::document::{Document, DocumentBuilder, TreeIterator};
use gosub_html5::parser::Html5Parser;
use gosub_shared::byte_stream::ByteStream;
use gosub_shared::byte_stream::{ByteStream, Encoding};

fn wikipedia_main_page(c: &mut Criterion) {
// Criterion can report inconsistent results from run to run in some cases. We attempt to
Expand All @@ -14,9 +14,8 @@ fn wikipedia_main_page(c: &mut Criterion) {
group.significance_level(0.1).sample_size(500);

let html_file = File::open("tests/data/tree_iterator/wikipedia_main.html").unwrap();
let mut stream = ByteStream::new();
let _ = stream.read_from_file(html_file, Some(gosub_shared::byte_stream::Encoding::UTF8));
stream.set_confidence(gosub_shared::byte_stream::Confidence::Certain);
let mut stream = ByteStream::new(Encoding::UTF8, None);
let _ = stream.read_from_file(html_file);

let main_document = DocumentBuilder::new_document(None);
let document = Document::clone(&main_document);
Expand All @@ -41,9 +40,8 @@ fn stackoverflow_home(c: &mut Criterion) {

// using the main page of (english) wikipedia as a rough estimate of traversing a decently sized website
let html_file = File::open("tests/data/tree_iterator/stackoverflow.html").unwrap();
let mut bytestream = ByteStream::new();
let _ = bytestream.read_from_file(html_file, Some(gosub_shared::byte_stream::Encoding::UTF8));
bytestream.set_confidence(gosub_shared::byte_stream::Confidence::Certain);
let mut bytestream = ByteStream::new(Encoding::UTF8, None);
let _ = bytestream.read_from_file(html_file);

let main_document = DocumentBuilder::new_document(None);
let document = Document::clone(&main_document);
Expand Down
5 changes: 2 additions & 3 deletions crates/gosub_bindings/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub mod wrapper;
use gosub_html5::parser::document::{Document, DocumentBuilder};
use gosub_html5::parser::Html5Parser;
use gosub_rendering::render_tree::{Node, NodeType, RenderTree, TreeIterator};
use gosub_shared::byte_stream::{ByteStream, Confidence, Encoding};
use gosub_shared::byte_stream::{ByteStream, Encoding};
use wrapper::node::CNode;

/// Initialize a render tree and return an owning pointer to it.
Expand All @@ -30,9 +30,8 @@ pub unsafe extern "C" fn gosub_rendertree_init(html: *const c_char) -> *mut Rend
return ptr::null_mut();
}
};
let mut stream = ByteStream::new();
let mut stream = ByteStream::new(Encoding::UTF8, None);
stream.read_from_str(html_str, Some(Encoding::UTF8));
stream.set_confidence(Confidence::Certain);
stream.close();

let doc = DocumentBuilder::new_document(None);
Expand Down
19 changes: 14 additions & 5 deletions crates/gosub_css3/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::node::Node;
use crate::node::{Node, NodeType};
use crate::parser_config::{Context, ParserConfig};
use crate::tokenizer::Tokenizer;
use gosub_shared::byte_stream::{ByteStream, Encoding, Location};
Expand Down Expand Up @@ -46,16 +46,25 @@ impl<'stream> Css3<'stream> {
pub fn parse(data: &str, config: ParserConfig) -> Result<Node, Error> {
let t_id = timing_start!("css3.parse", config.source.as_deref().unwrap_or(""));

let mut stream = ByteStream::new();
let mut stream = ByteStream::new(Encoding::UTF8, None);
stream.read_from_str(data, Some(Encoding::UTF8));
stream.close();

let mut parser = Css3::new(&mut stream);
let ret = parser.parse_internal(config);
let result = parser.parse_internal(config);

timing_stop!(t_id);

ret
match result {
Ok(Some(node)) => Ok(node),
Ok(None) => Ok(Node::new(
NodeType::StyleSheet {
children: Vec::new(),
},
Location::default(),
)),
Err(e) => Err(e),
}
}

/// Create a new parser with the given bytestream
Expand All @@ -68,7 +77,7 @@ impl<'stream> Css3<'stream> {
}

/// Actual parser implementation
fn parse_internal(&mut self, config: ParserConfig) -> Result<Node, Error> {
fn parse_internal(&mut self, config: ParserConfig) -> Result<Option<Node>, Error> {
self.config = config;

match self.config.context {
Expand Down
7 changes: 7 additions & 0 deletions crates/gosub_css3/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ pub enum NodeType {
Container {
children: Vec<Node>,
},
Range {
left: Node,
left_comparison: Node,
middle: Node,
right_comparison: Option<Node>,
right: Option<Node>,
},
}

/// A node is a single element in the AST
Expand Down
31 changes: 21 additions & 10 deletions crates/gosub_css3/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ impl Css3<'_> {
if t.token_type != token_type {
return Err(Error::new(
format!("Expected {:?}, got {:?}", token_type, t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}

Expand All @@ -44,7 +44,7 @@ impl Css3<'_> {
TokenType::Function(name) => Ok(name),
_ => Err(Error::new(
format!("Expected function, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -55,7 +55,7 @@ impl Css3<'_> {
TokenType::Number(value) => Ok(value),
_ => Err(Error::new(
format!("Expected number, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -66,7 +66,7 @@ impl Css3<'_> {
TokenType::Delim(c) => Ok(c),
_ => Err(Error::new(
format!("Expected delimiter, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -77,7 +77,7 @@ impl Css3<'_> {
TokenType::QuotedString(s) => Ok(s),
_ => Err(Error::new(
format!("Expected string, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -88,7 +88,7 @@ impl Css3<'_> {
TokenType::Delim(c) if c == delimiter => Ok(c),
_ => Err(Error::new(
format!("Expected delimiter '{}', got {:?}", delimiter, t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -97,7 +97,7 @@ impl Css3<'_> {
loop {
let t = self.tokenizer.consume();
match t.token_type {
TokenType::Whitespace | TokenType::Comment(_) => {
TokenType::Whitespace(_) | TokenType::Comment(_) => {
// just eat it
}
_ => {
Expand All @@ -114,7 +114,7 @@ impl Css3<'_> {
TokenType::Ident(s) if s.eq_ignore_ascii_case(ident) => Ok(s),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand All @@ -125,18 +125,29 @@ impl Css3<'_> {
TokenType::Ident(s) if s == ident => Ok(s),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}

pub fn consume_any_ident(&mut self) -> Result<String, Error> {
let t = self.tokenizer.consume();

match t.token_type {
TokenType::Delim('.') => {
let t = self.tokenizer.consume();
match t.token_type {
TokenType::Ident(s) => Ok(format!(".{}", s)),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location(),
)),
}
}
TokenType::Ident(s) => Ok(s),
_ => Err(Error::new(
format!("Expected ident, got {:?}", t),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
)),
}
}
Expand Down
14 changes: 7 additions & 7 deletions crates/gosub_css3/src/parser/anplusb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ impl Css3<'_> {
if unit.chars().nth(0).unwrap().to_lowercase().to_string() != "n" {
return Err(Error::new(
format!("Expected n, found {}", unit).to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
Ok(if unit.len() == 1 {
Expand Down Expand Up @@ -43,7 +43,7 @@ impl Css3<'_> {
if !allow_sign {
return Err(Error::new(
format!("Unexpected sign {}", sign).to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
pos += 1;
Expand All @@ -68,7 +68,7 @@ impl Css3<'_> {
if nval != c {
return Err(Error::new(
format!("Expected {}", c).to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}

Expand Down Expand Up @@ -113,7 +113,7 @@ impl Css3<'_> {
self.tokenizer.lookahead(0).token_type
)
.to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
};
Expand Down Expand Up @@ -199,7 +199,7 @@ impl Css3<'_> {
pub fn parse_anplusb(&mut self) -> Result<Node, Error> {
log::trace!("parse_anplusb");

let loc = self.tokenizer.current_location().clone();
let loc = self.tokenizer.current_location();

let mut a = String::from("1");
let mut b;
Expand Down Expand Up @@ -230,7 +230,7 @@ impl Css3<'_> {
self.tokenizer.reconsume();
return Err(Error::new(
"Expected anplusb".to_string(),
self.tokenizer.current_location().clone(),
self.tokenizer.current_location(),
));
}
}
Expand All @@ -254,7 +254,7 @@ mod test {

macro_rules! test {
($func:ident, $input:expr, $expected:expr) => {
let mut stream = ByteStream::new();
let mut stream = ByteStream::new(Encoding::UTF8, None);
stream.read_from_str($input, Some(Encoding::UTF8));
stream.close();

Expand Down
Loading
Loading