diff --git a/.gitignore b/.gitignore
index ea8c4bf7f..39518e663 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 /target
+html5lib-tests
diff --git a/Cargo.lock b/Cargo.lock
index 4b3524145..585d079f7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,15 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "aho-corasick"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6748e8def348ed4d14996fa801f4122cd763fff530258cdc03f64b25f89d3a5a"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "convert_case"
 version = "0.4.0"
@@ -22,13 +31,36 @@ dependencies = [
 ]
 
 [[package]]
-name = "gosub-browser"
+name = "gosub-engine"
 version = "0.1.0"
 dependencies = [
  "derive_more",
+ "lazy_static",
  "phf",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
 ]
 
+[[package]]
+name = "itoa"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
 [[package]]
 name = "phf"
 version = "0.11.2"
@@ -104,6 +136,35 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 
+[[package]]
+name = "regex"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.0"
@@ -113,12 +174,46 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "ryu"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
+
 [[package]]
 name = "semver"
 version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
 
+[[package]]
+name = "serde"
+version = "1.0.183"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c"
+
+[[package]]
+name = "serde_derive"
+version = "1.0.183"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.28",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.105"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
 [[package]]
 name = "siphasher"
 version = "0.3.10"
diff --git a/Cargo.toml b/Cargo.toml
index e008cfe9f..8caf39846 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,10 +1,20 @@
 [package]
-name = "gosub-browser"
+name = "gosub-engine"
 version = "0.1.0"
 edition = "2021"
+authors = ["Joshua Thijssen <jaytaph@deadcode.nl>"]
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+description = "html5 browser engine"
+license = "MIT"
+repository = "https://github.com/jaytaph/gosub-browser"
+readme = "README.MD"
+keywords = ["html5", "parser"]
 
 [dependencies]
 phf = { version = "0.11.2", features = ["macros"] }
 derive_more = "0.99"
+serde = "1.0"
+serde_json = "1.0"
+serde_derive = "1.0"
+regex = "1"
+lazy_static = "1.4"
\ No newline at end of file
diff --git a/src/main.rs b/src/bin/gosub-browser.rs
similarity index 74%
rename from src/main.rs
rename to src/bin/gosub-browser.rs
index 807d33f74..0f93e43ca 100644
--- a/src/main.rs
+++ b/src/bin/gosub-browser.rs
@@ -1,14 +1,11 @@
 use std::fs::File;
 
-#[allow(dead_code)]
-mod html5_parser;
-
-use html5_parser::input_stream::Confidence;
-use html5_parser::input_stream::{Encoding, InputStream};
-use html5_parser::Html5Parser;
+use gosub_engine::html5_parser::input_stream::Confidence;
+use gosub_engine::html5_parser::input_stream::{Encoding, InputStream};
+use gosub_engine::html5_parser::parser::Html5Parser;
 
 fn main() {
-    let file = File::open("hello.html").expect("could not open file");
+    let file = File::open("../../hello.html").expect("could not open file");
 
     // We just read the stream from a file. It will use UTF8 as the default encoding.
     let mut stream = InputStream::new();
diff --git a/src/bin/html5test.rs b/src/bin/html5test.rs
new file mode 100755
index 000000000..2767ced64
--- /dev/null
+++ b/src/bin/html5test.rs
@@ -0,0 +1,422 @@
+use std::{env, fs, io};
+use std::collections::HashSet;
+
+use serde_json::Value;
+use gosub_engine::html5_parser::input_stream::InputStream;
+use gosub_engine::html5_parser::token_states::{State as TokenState};
+use gosub_engine::html5_parser::tokenizer::{Options, Tokenizer};
+use gosub_engine::html5_parser::token::{Token, TokenTrait, TokenType};
+
+extern crate regex;
+use regex::Regex;
+
+#[macro_use]
+extern crate serde_derive;
+
+// These tests are skipped for various reasons. See test_results.md
+const SKIP_TESTS: [&str; 1] = [
+    "<!DOCTYPE a PUBLIC'\\uDBC0\\uDC00",
+
+];
+
+#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Root {
+    pub tests: Vec<Test>,
+}
+
+#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Test {
+    pub description: String,
+    pub input: String,
+    pub output: Vec<Vec<Value>>,
+    #[serde(default)]
+    pub errors: Vec<Error>,
+    #[serde(default)]
+    pub double_escaped: Option<bool>,
+    #[serde(default)]
+    pub initial_states: Vec<String>,
+    pub last_start_tag: Option<String>,
+}
+
+#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Error {
+    pub code: String,
+    pub line: i64,
+    pub col: i64,
+}
+
+pub struct TestResults{
+    tests: usize,               // Number of tests (as defined in the suite)
+    assertions: usize,          // Number of assertions (different combinations of input/output per test)
+    succeeded: usize,           // How many succeeded assertions
+    failed: usize,              // How many failed assertions
+    failed_position: usize,     // How many failed assertions where position is not correct
+}
+
+fn main () -> io::Result<()> {
+    let default_dir = "./html5lib-tests";
+    let dir = env::args().nth(1).unwrap_or(default_dir.to_string());
+
+    let mut results = TestResults{
+        tests: 0,
+        assertions: 0,
+        succeeded: 0,
+        failed: 0,
+        failed_position: 0,
+    };
+    
+    for entry in fs::read_dir(dir + "/tokenizer")? {
+        let entry = entry?;
+        let path = entry.path();
+
+        if !path.is_file() || path.extension().unwrap() != "test" {
+            continue;
+        }
+
+        let contents = fs::read_to_string(&path)?;
+        let container = serde_json::from_str(&contents);
+        if container.is_err() {
+            continue;
+        }
+        let container: Root = container.unwrap();
+
+        println!("🏃‍♂️ Running {} tests from 🗄️ {:?}", container.tests.len(), path);
+
+        for test in container.tests {
+            run_token_test(&test, &mut results)
+        }
+    }
+
+    println!("🏁 Tests completed: Ran {} tests, {} assertions, {} succeeded, {} failed ({} position failures)", results.tests, results.assertions, results.succeeded, results.failed, results.failed_position);
+    Ok(())
+}
+
+fn run_token_test(test: &Test, results: &mut TestResults)
+{
+    for skip in SKIP_TESTS {
+        if test.description == skip {
+            println!("🧪 Skipping test: {}", test.description);
+            return;
+        }
+    }
+
+    println!("🧪 Running test: {}", test.description);
+
+    results.tests += 1;
+
+    // If no initial state is given, assume Data state
+    let mut states = test.initial_states.clone();
+    if states.is_empty() {
+        states.push(String::from("Data state"));
+    }
+
+
+    for state in states.iter() {
+        let state= match state.as_str() {
+            "PLAINTEXT state" => TokenState::PlaintextState,
+            "RAWTEXT state" => TokenState::RawTextState,
+            "RCDATA state" => TokenState::RcDataState,
+            "Script data state" => TokenState::ScriptDataState,
+            "CDATA section state" => TokenState::CDataSectionState,
+            "Data state" => TokenState::DataState,
+            _ => panic!("unknown state found in test: {} ", state)
+        };
+
+        let mut is = InputStream::new();
+        let input = if test.double_escaped.unwrap_or(false) {
+            escape(test.input.as_str())
+        } else {
+            test.input.to_string()
+        };
+
+        is.read_from_str(input.as_str(), None);
+        let mut tokenizer = Tokenizer::new(&mut is, Some(Options{
+            initial_state: state,
+            last_start_tag: test.last_start_tag.clone().unwrap_or(String::from("")),
+        }));
+
+        // If there is no output, still do an (initial) next token so the parser can generate
+        // errors.
+        if test.output.is_empty() {
+            tokenizer.next_token();
+        }
+
+        // There can be multiple tokens to match. Make sure we match all of them
+        for expected_token in test.output.iter() {
+            let t = tokenizer.next_token();
+            if !match_token(t, expected_token, test.double_escaped.unwrap_or(false)) {
+                results.assertions += 1;
+                results.failed += 1;
+            }
+        }
+
+        if tokenizer.errors.len() != test.errors.len() {
+            println!("❌ Unexpected errors found (wanted {}, got {}): ", test.errors.len(), tokenizer.errors.len());
+            for want_err in &test.errors {
+                println!("     * Want: '{}' at {}:{}", want_err.code, want_err.line, want_err.col);
+            }
+            for got_err in tokenizer.get_errors() {
+                println!("     * Got: '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
+            }
+            results.assertions += 1;
+            results.failed += 1;
+        }
+
+        // Check error messages
+        for error in &test.errors {
+            match match_error(&tokenizer, &error) {
+                ErrorResult::Failure => {
+                    results.assertions += 1;
+                    results.failed += 1;
+                },
+                ErrorResult::PositionFailure => {
+                    results.assertions += 1;
+                    results.failed += 1;
+                    results.failed_position += 1;
+                },
+                ErrorResult::Success => {
+                    results.assertions += 1;
+                    results.succeeded += 1;
+                }
+            }
+        }
+    }
+
+    println!("----------------------------------------");
+}
+
+#[derive(PartialEq)]
+enum ErrorResult {
+    Success,            // Found the correct error
+    Failure,            // Didn't find the error (not even with incorrect position)
+    PositionFailure,    // Found the error, but on a incorrect position
+}
+
+fn match_error(tokenizer: &Tokenizer, expected_err: &Error) -> ErrorResult {
+
+    // Iterate all generated errors to see if we have an exact match
+    for got_err in tokenizer.get_errors() {
+        if got_err.message == expected_err.code && got_err.line as i64 == expected_err.line && got_err.col as i64 == expected_err.col {
+            // Found an exact match
+            println!("✅ Found parse error '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
+
+            return ErrorResult::Success;
+        }
+    }
+
+    // Try and find an error that matches the code, but has a different line/pos. Even though
+    // it's not always correct, it might be a off-by-one position.
+    let mut result = ErrorResult::Failure;
+    for got_err in tokenizer.get_errors() {
+        if got_err.message == expected_err.code {
+            if got_err.line as i64 != expected_err.line || got_err.col as i64 != expected_err.col {
+                // println!("❌ Expected error '{}' at {}:{}", expected_err.code, expected_err.line, expected_err.col);
+                result = ErrorResult::PositionFailure;
+                break;
+            }
+        }
+    }
+
+    println!("❌ Expected error '{}' at {}:{}", expected_err.code, expected_err.line, expected_err.col);
+
+    println!("   Parser errors generated:");
+    for got_err in tokenizer.get_errors() {
+        println!("     * '{}' at {}:{}", got_err.message, got_err.line, got_err.col);
+    }
+
+    result
+}
+
+fn match_token(have: Token, expected: &[Value], double_escaped: bool) -> bool {
+    let tp = expected.get(0).unwrap();
+
+    let expected_token_type = match tp.as_str().unwrap() {
+        "DOCTYPE" => TokenType::DocTypeToken,
+        "StartTag" => TokenType::StartTagToken,
+        "EndTag" => TokenType::EndTagToken,
+        "Comment" => TokenType::CommentToken,
+        "Character" => TokenType::TextToken,
+        _ => panic!("unknown output token type {:?}", tp.as_str().unwrap())
+    };
+
+    if have.type_of() != expected_token_type {
+        println!("❌ Incorrect token type found (want: {:?}, got {:?})", expected_token_type, have.type_of());
+        return false;
+    }
+
+    match have {
+        Token::DocTypeToken{name, force_quirks, pub_identifier, sys_identifier} => {
+            if check_match_doctype(expected, name, force_quirks, pub_identifier, sys_identifier).is_err() {
+                return false;
+            }
+        }
+        Token::StartTagToken{name, attributes, is_self_closing} => {
+            if check_match_starttag(expected, name, attributes, is_self_closing).is_err() {
+                return false;
+            }
+        }
+        Token::EndTagToken{name} => {
+            if check_match_endtag(expected, name, double_escaped).is_err() {
+                return false;
+            }
+        }
+        Token::CommentToken{value} => {
+            if check_match_comment(expected, value, double_escaped).is_err() {
+                return false;
+            }
+        }
+        Token::TextToken{value} => {
+            if check_match_text(expected, value, double_escaped).is_err() {
+                return false;
+            }
+        },
+        Token::EofToken => {
+            println!("❌ EOF token");
+            return false;
+        }
+    }
+
+    println!("✅ Test passed");
+    true
+}
+
+fn check_match_starttag(expected: &[Value], name: String, attributes: Vec<(String, String)>, is_self_closing: bool) -> Result<(), ()> {
+    let expected_name = expected.get(1).and_then(|v| v.as_str()).unwrap();
+    let expected_attrs = expected.get(2).and_then(|v| v.as_object());
+    let expected_self_closing = expected.get(3).and_then(|v| v.as_bool());
+
+    if expected_name != name {
+        println!("❌ Incorrect start tag (wanted: '{}', got '{}'", name, expected_name);
+        return Err(());
+    }
+
+    if expected_self_closing.is_some() && expected_self_closing.unwrap() != is_self_closing {
+        println!("❌ Incorrect start tag (expected selfclosing: {})", !is_self_closing);
+        return Err(());
+    }
+
+    if expected_attrs.is_none() && attributes.len() == 0 {
+        // No attributes to check
+        return Ok(());
+    }
+
+    // Convert the expected attr to Vec<(string, string)>
+    let expected_attrs: Vec<(String, String)> = expected_attrs.map_or(Vec::new(), |map| {
+        map.iter()
+            .filter_map(|(key, value)| {
+                value.as_str().map(|v| (key.clone(), v.to_string()))
+            })
+            .collect()
+    });
+
+    let set1: HashSet<_> = expected_attrs.iter().collect();
+    let set2: HashSet<_> = attributes.iter().collect();
+
+    if set1 != set2 {
+        println!("❌ Attributes mismatch");
+
+        for attr in expected_attrs {
+            println!("     * Want: '{}={}'", &attr.0, &attr.1);
+        }
+        for attr in attributes {
+            println!("     * Got: '{}={}'", attr.0, attr.1);
+        }
+
+        return Err(())
+    }
+
+    Ok(())
+}
+
+fn check_match_comment(expected: &[Value], value: String, is_double_escaped: bool) -> Result<(), ()> {
+    let output_ref = expected.get(1).unwrap().as_str().unwrap();
+    let output = if is_double_escaped { escape(output_ref) } else { output_ref.to_string() };
+
+    if value.ne(&output) {
+        println!("❌ Incorrect text found in comment token");
+        println!("    wanted: '{}', got: '{}'", output, value.as_str());
+        return Err(());
+    }
+
+    Ok(())
+}
+
+fn check_match_text(expected: &[Value], value: String, is_double_escaped: bool) -> Result<(), ()> {
+    let output_ref = expected.get(1).unwrap().as_str().unwrap();
+    let output = if is_double_escaped { escape(output_ref) } else { output_ref.to_string() };
+
+    if value.ne(&output) {
+        println!("❌ Incorrect text found in text token");
+        println!("    wanted: '{}', got: '{}'", output, value.as_str());
+        return Err(());
+    }
+
+    Ok(())
+}
+
+fn check_match_endtag(expected: &[Value], name: String, is_double_escaped: bool) -> Result<(), ()> {
+    let output_ref = expected.get(1).unwrap().as_str().unwrap();
+    let output = if is_double_escaped { escape(output_ref) } else { output_ref.to_string() };
+
+    if name.as_str() != output {
+        println!("❌ Incorrect end tag");
+        return Err(());
+    }
+    Ok(())
+}
+
+// Check if a given doctype matches the expected result
+fn check_match_doctype(
+    expected: &[Value],
+    name: Option<String>,
+    force_quirks: bool,
+    pub_identifier: Option<String>,
+    sys_identifier: Option<String>
+) -> Result<(), ()> {
+    let expected_name = expected.get(1).unwrap().as_str();
+    let expected_pub = expected.get(2).unwrap().as_str();
+    let expected_sys = expected.get(3).unwrap().as_str();
+    let expected_quirk = expected.get(4).unwrap().as_bool();
+
+    if expected_name.is_none() && ! name.is_none() {
+        println!("❌ Incorrect doctype (no name expected, but got '{}')", name.unwrap());
+        return Err(());
+    }
+    if expected_name.is_some() && name.is_none() {
+        println!("❌ Incorrect doctype (name expected, but got none)");
+        return Err(());
+    }
+    if expected_name.is_some() && expected_name != Some(name.clone().unwrap().as_str()) {
+        println!("❌ Incorrect doctype (wanted name: '{}', got: '{}')", expected_name.unwrap(), name.unwrap().as_str());
+        return Err(());
+    }
+    if expected_quirk.is_some() && expected_quirk.unwrap() == force_quirks {
+        println!("❌ Incorrect doctype (wanted quirk: '{}')", expected_quirk.unwrap());
+        return Err(());
+    }
+    if expected_pub != pub_identifier.as_deref() {
+        println!("❌ Incorrect doctype (wanted pub id: '{:?}', got '{:?}')", expected_pub, pub_identifier);
+        return Err(());
+    }
+    if expected_sys != sys_identifier.as_deref() {
+        println!("❌ Incorrect doctype (wanted sys id: '{:?}', got '{:?}')", expected_sys, sys_identifier);
+        return Err(());
+    }
+
+    Ok(())
+}
+
+fn escape(input: &str) -> String {
+    let re = Regex::new(r"\\u([0-9a-fA-F]{4})").unwrap();
+    re.replace_all(input, |caps: &regex::Captures| {
+        let hex_val = u32::from_str_radix(&caps[1], 16).unwrap();
+
+        // This will also convert surrogates?
+        unsafe {
+            char::from_u32_unchecked(hex_val).to_string()
+        }
+    }).into_owned()
+}
\ No newline at end of file
diff --git a/src/html5_parser/consume_char_refs.rs b/src/html5_parser/consume_char_refs.rs
index 50402bce0..d070d6ca7 100644
--- a/src/html5_parser/consume_char_refs.rs
+++ b/src/html5_parser/consume_char_refs.rs
@@ -1,285 +1,352 @@
+use crate::html5_parser::parse_errors::ParserError;
 use crate::html5_parser::token_named_characters::TOKEN_NAMED_CHARS;
 use crate::html5_parser::token_replacements::TOKEN_REPLACEMENTS;
 use crate::html5_parser::tokenizer::Tokenizer;
+use crate::html5_parser::input_stream::Element;
+use crate::read_char;
 
-use super::tokenizer::CHAR_REPLACEMENT;
-
-// All references are to chapters in https://dev.w3.org/html5/spec-LC/tokenization.html
-
-impl<'a> Tokenizer<'a> {
-    // Consumes a character reference and places this in the tokenizer consume buffer
-    // ref: 8.2.4.69 Tokenizing character references
-    pub fn consume_character_reference(
-        &mut self,
-        additional_allowed_char: Option<char>,
-        as_attribute: bool,
-    ) -> Option<String> {
-        // self.clear_consume_buffer();
-
-        if as_attribute {
-            // When we are inside an attribute context, things (will/might) be different. Not sure how yet.
-        }
+extern crate lazy_static;
+use lazy_static::lazy_static;
+use crate::html5_parser::input_stream::SeekMode::SeekCur;
 
-        let c = match self.stream.read_char() {
-            Some(c) => c,
-            None => {
-                return None;
-            }
-        };
-
-        // Characters that aren't allowed
-        let mut chars = vec![
-            crate::html5_parser::tokenizer::CHAR_TAB,
-            crate::html5_parser::tokenizer::CHAR_LF,
-            crate::html5_parser::tokenizer::CHAR_FF,
-            crate::html5_parser::tokenizer::CHAR_SPACE,
-            '<',
-            '&',
-        ];
-
-        // The name is weird: addiitonal_allowed_chars, but it would be a char that is NOT allowed (?)
-        if additional_allowed_char.is_some() {
-            chars.push(additional_allowed_char.unwrap())
-        }
+use super::tokenizer::CHAR_REPLACEMENT;
 
-        if chars.contains(&c) {
-            self.stream.unread();
-            return None;
-        }
+// Different states for the character references
+pub enum CcrState {
+    CharacterReferenceState,
+    NamedCharacterReferenceState,
+    AmbiguousAmpersandState,
+    NumericCharacterReferenceState,
+    HexadecimalCharacterReferenceStartState,
+    DecimalCharacterReferenceStartState,
+    HexadecimalCharacterReferenceState,
+    DecimalCharacterReferenceState,
+    NumericalCharacterReferenceEndState,
+}
 
-        // Consume a number when we found &#
-        if c == '#' {
-            self.consume('&');
-            self.consume(c);
-            if self.consume_number().is_err() {
-                self.stream.unread();
-                return None;
+macro_rules! consume_temp_buffer {
+    ($self:expr, $as_attribute:expr) => {
+        for c in $self.temporary_buffer.clone() {
+            if $as_attribute {
+                $self.current_attr_value.push(c);
+            } else {
+                $self.consume(c);
             }
-
-            return Some(self.get_consumed_str());
-        }
-
-        // Consume anything else when we found & with another char after (ie: &raquo;)
-        self.stream.unread();
-        if self.consume_entity(as_attribute).is_err() {
-            self.stream.unread();
-            return None;
         }
+        $self.temporary_buffer.clear();
+    };
+}
 
-        return Some(self.get_consumed_str());
-    }
-
-    // Consume a number like #x1234, #123 etc
-    fn consume_number(&mut self) -> Result<String, String> {
-        let mut str_num = String::new();
-
-        // Save current position for easy recovery
-        let cp = self.stream.tell();
+impl<'a> Tokenizer<'a> {
+    // Consumes a character reference and places this in the tokenizer consume buffer
+    // ref: 8.2.4.69 Tokenizing character references
 
-        // Is the char a 'X' or 'x', then we must try and fetch hex digits, otherwise just 0..9
-        let mut is_hex = false;
-        let hex = match self.stream.look_ahead(0) {
-            Some(hex) => hex,
-            None => {
-                return Err(String::new());
-            }
-        };
+    // @TODO: fix additional allowed char
+    pub fn consume_character_reference(&mut self, _additional_allowed_char: Option<Element>, as_attribute: bool)
+    {
+        let mut ccr_state = CcrState::CharacterReferenceState;
+        let mut char_ref_code: Option<u32> = Some(0);
 
-        if hex == 'x' || hex == 'X' {
-            is_hex = true;
+        loop {
+            match ccr_state {
+                CcrState::CharacterReferenceState => {
+                    self.temporary_buffer = vec!['&'];
+
+                    let c = read_char!(self);
+                    match c {
+                        // Element::Eof => {
+                        //     consume_temp_buffer!(self, as_attribute);
+                        //     return
+                        // },
+                        Element::Utf8('A'..='Z') | Element::Utf8('a'..='z') | Element::Utf8('0'..='9') => {
+                            self.stream.unread();
+                            ccr_state = CcrState::NamedCharacterReferenceState;
+                        },
+                        Element::Utf8('#') => {
+                            self.temporary_buffer.push(c.utf8());
+                            ccr_state = CcrState::NumericCharacterReferenceState;
+                        },
+                        _ => {
+                            consume_temp_buffer!(self, as_attribute);
+
+                            self.stream.unread();
+                            return;
+                        }
+                    }
+                },
+                CcrState::NamedCharacterReferenceState => {
+                    if let Some(entity) = self.find_entity() {
+
+                        self.stream.seek(SeekCur, entity.len() as isize);
+                        let c = self.stream.look_ahead(0);
+                        if
+                            as_attribute &&
+                            entity.chars().last().unwrap() != ';' &&
+                            c.is_utf8() &&
+                            (c.utf8() == '=' || c.utf8().is_ascii_alphanumeric())
+                        {
+                            // for historical reasons, the codepoints should be flushed as is
+                            for c in entity.chars() {
+                                self.temporary_buffer.push(c);
+                            }
+
+                            consume_temp_buffer!(self, as_attribute);
+                            return;
+                        }
+
+                        let entity_chars = *TOKEN_NAMED_CHARS.get(entity.as_str()).unwrap();
+
+                        // Flush codepoints consumed as character reference
+                        for c in entity_chars.chars() {
+                            if as_attribute {
+                                self.current_attr_value.push(c);
+                            } else {
+                                self.consume(c);
+                            }
+                        }
+                        self.temporary_buffer.clear();
+
+                        if entity.chars().last().unwrap() != ';' {
+                            // We need to return the position where we expected the ';'
+                            self.stream.read_char();    // @TODO: We can't use skip, as this might interfere with EOF stuff (fix it)
+                            self.parse_error(ParserError::MissingSemicolonAfterCharacterReference);
+                            self.stream.unread();
+                        }
+
+                        return;
+                    }
 
-            // Consume the 'x' character
-            let c = match self.stream.read_char() {
-                Some(c) => c,
-                None => {
-                    self.stream.seek(cp);
-                    return Err(String::new());
+                    consume_temp_buffer!(self, as_attribute);
+                    ccr_state = CcrState::AmbiguousAmpersandState;
                 }
-            };
-
-            self.consume(c);
-        };
-
-        let mut i = 0;
-        loop {
-            let c = match self.stream.read_char() {
-                Some(c) => c,
-                None => {
-                    self.stream.seek(cp);
-                    return Err(String::new());
+                CcrState::AmbiguousAmpersandState => {
+                    let c = read_char!(self);
+                    match c {
+                        // Element::Eof => return,
+                        Element::Utf8('A'..='Z') | Element::Utf8('a'..='z') | Element::Utf8('0'..='9') => {
+                            if as_attribute {
+                                self.current_attr_value.push(c.utf8());
+                            } else {
+                                self.consume(c.utf8());
+                            }
+                        },
+                        Element::Utf8(';') => {
+                            self.parse_error(ParserError::UnknownNamedCharacterReference);
+                            self.stream.unread();
+                            return;
+                        }
+                        _ => {
+                            self.stream.unread();
+                            return;
+                        }
+                    }
                 }
-            };
-
-            if is_hex && c.is_ascii_hexdigit() {
-                str_num.push(c);
-                self.consume(c);
-            } else if !is_hex && c.is_ascii_digit() {
-                str_num.push(c);
-                self.consume(c);
-            } else {
-                self.stream.unread();
-                break;
-            }
-
-            i += 1;
-        }
-
-        // Fetch next character
-        let c = match self.stream.read_char() {
-            Some(c) => c,
-            None => {
-                self.stream.seek(cp);
-                return Err(String::new());
-            }
-        };
-
-        // Next character MUST be ;
-        if c != ';' {
-            self.parse_error("expected a ';'");
-            self.stream.seek(cp);
-            return Err(String::new());
-        }
-
-        self.consume(c);
+                CcrState::NumericCharacterReferenceState => {
+                    char_ref_code = Some(0);
+
+                    let c = read_char!(self);
+                    match c {
+                        // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState,
+                        Element::Utf8('X') | Element::Utf8('x') => {
+                            self.temporary_buffer.push(c.utf8());
+                            ccr_state = CcrState::HexadecimalCharacterReferenceStartState;
+                        }
+                        _ => {
+                            self.stream.unread();
+                            ccr_state = CcrState::DecimalCharacterReferenceStartState;
+                        }
+                    }
+                }
+                CcrState::HexadecimalCharacterReferenceStartState => {
+                    let c = read_char!(self);
+                    match c {
+                        // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState,
+                        Element::Utf8('0'..='9') | Element::Utf8('A'..='F') | Element::Utf8('a'..='f') => {
+                            self.stream.unread();
+                            ccr_state = CcrState::HexadecimalCharacterReferenceState
+                        }
+                        _ => {
+                            self.parse_error(ParserError::AbsenceOfDigitsInNumericCharacterReference);
+                            consume_temp_buffer!(self, as_attribute);
+
+                            self.stream.unread();
+                            return;
+                        }
+                    }
+                }
+                CcrState::DecimalCharacterReferenceStartState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('0'..='9') => {
+                            self.stream.unread();
+                            ccr_state = CcrState::DecimalCharacterReferenceState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::AbsenceOfDigitsInNumericCharacterReference);
+                            consume_temp_buffer!(self, as_attribute);
+
+                            self.stream.unread();
+                            return;
+                        }
+                    }
+                }
+                CcrState::HexadecimalCharacterReferenceState => {
+                    let c = read_char!(self);
+                    match c {
+                        // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState,
+                        Element::Utf8('0'..='9') => {
+                            let i = c.utf8() as u32 - 0x30;
+                            if let Some(value) = char_ref_code {
+                                char_ref_code = value
+                                    .checked_mul(16)
+                                    .and_then(|mul_result| mul_result.checked_add(i));
+                            }
+                        }
+                        Element::Utf8('A'..='F') => {
+                            let i = c.utf8() as u32 - 0x37;
+                            if let Some(value) = char_ref_code {
+                                char_ref_code = value
+                                    .checked_mul(16)
+                                    .and_then(|mul_result| mul_result.checked_add(i));
+                            }
+                        }
+                        Element::Utf8('a'..='f') => {
+                            let i = c.utf8() as u32 - 0x57;
+                            if let Some(value) = char_ref_code {
+                                char_ref_code = value
+                                    .checked_mul(16)
+                                    .and_then(|mul_result| mul_result.checked_add(i));
+                            }
+                        }
+                        Element::Utf8(';') => {
+                            ccr_state = CcrState::NumericalCharacterReferenceEndState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingSemicolonAfterCharacterReference);
+                            self.stream.unread();
+                            ccr_state = CcrState::NumericalCharacterReferenceEndState;
+                        }
+                    }
+                }
+                CcrState::DecimalCharacterReferenceState => {
+                    let c = read_char!(self);
+                    match c {
+                        // Element::Eof => ccr_state = CcrState::NumericalCharacterReferenceEndState,
+                        Element::Utf8('0'..='9') => {
+                            let i = c.utf8() as u32 - 0x30;
+                            if let Some(value) = char_ref_code {
+                                char_ref_code = value
+                                    .checked_mul(10)
+                                    .and_then(|mul_result| mul_result.checked_add(i));
+                            }
+                        }
+                        Element::Utf8(';') => {
+                            ccr_state = CcrState::NumericalCharacterReferenceEndState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingSemicolonAfterCharacterReference);
+                            self.stream.unread();
+                            ccr_state = CcrState::NumericalCharacterReferenceEndState;
+                        }
+                    }
+                }
+                CcrState::NumericalCharacterReferenceEndState => {
+                    let overflow = char_ref_code.is_none();
+                    let mut char_ref_code = char_ref_code.unwrap_or(0);
+
+                    if char_ref_code == 0 && !overflow {
+                        self.stream.read_char();
+                        self.parse_error(ParserError::NullCharacterReference);
+                        char_ref_code = CHAR_REPLACEMENT as u32;
+                    }
 
-        // If we found ;. we need to check how many digits we have parsed. It needs to be at least 1,
-        if i == 0 {
-            self.parse_error("didn't expect #;");
-            self.stream.seek(cp);
-            return Err(String::new());
-        }
+                    if char_ref_code > 0x10FFFF || overflow {
+                        self.stream.read_char();
+                        self.parse_error(ParserError::CharacterReferenceOutsideUnicodeRange);
+                        self.stream.unread();
+                        char_ref_code = CHAR_REPLACEMENT as u32;
+                    }
 
-        // check if we need to replace the character. First convert the number to a uint, and use that
-        // to check if it exists in the replacements table.
-        let num = match u32::from_str_radix(&*str_num, if is_hex { 16 } else { 10 }) {
-            Ok(n) => n,
-            Err(_) => 0, // lets pretend that an invalid value is set to 0
-        };
-
-        if TOKEN_REPLACEMENTS.contains_key(&num) {
-            // self.clear_consume_buffer();
-            self.consume(*TOKEN_REPLACEMENTS.get(&num).unwrap());
-            return Ok(String::new());
-        }
+                    if self.is_surrogate(char_ref_code) {
+                        self.stream.read_char();
+                        self.parse_error(ParserError::SurrogateCharacterReference);
+                        self.stream.unread();
+                        char_ref_code = CHAR_REPLACEMENT as u32;
+                    }
+                    if self.is_noncharacter(char_ref_code) {
+                        self.stream.read_char();
+                        self.parse_error(ParserError::NoncharacterCharacterReference);
+                        self.stream.unread();
+                        // char_ref_code = CHAR_REPLACEMENT as u32;
+                    }
+                    if self.is_control_char(char_ref_code) || char_ref_code == 0x0D {
+                        self.stream.read_char();
+                        self.stream.read_char();
+                        self.parse_error(ParserError::ControlCharacterReference);
+                        // self.stream.unread();
+                        self.stream.unread();
+
+                        if TOKEN_REPLACEMENTS.contains_key(&char_ref_code) {
+                            char_ref_code = *TOKEN_REPLACEMENTS.get(&char_ref_code).unwrap() as u32;
+                        }
+                    }
 
-        // Next, check if we are in the 0xD800..0xDFFF or 0x10FFFF range, if so, replace
-        if (num > 0xD800 && num < 0xDFFF) || (num > 0x10FFFFF) {
-            self.parse_error("within reserved codepoint range, but replaced");
-            // self.clear_consume_buffer();
-            self.consume(crate::html5_parser::tokenizer::CHAR_REPLACEMENT);
-            return Ok(String::new());
-        }
+                    self.temporary_buffer = vec![char::from_u32(char_ref_code).unwrap_or(CHAR_REPLACEMENT)];
+                    consume_temp_buffer!(self, as_attribute);
 
-        // Check if it's in a reserved range, in that case, we ignore the data
-        if self.in_reserved_number_range(num) {
-            self.parse_error("within reserved codepoint range, ignored");
-            // self.clear_consume_buffer();
-            return Ok(String::new());
+                    return;
+                }
+            }
         }
-
-        // self.clear_consume_buffer();
-        self.consume(std::char::from_u32(num).unwrap_or(CHAR_REPLACEMENT));
-
-        return Ok(String::new());
     }
 
-    // Returns if the given codepoint number is in a reserved range (as defined in
-    // https://dev.w3.org/html5/spec-LC/tokenization.html#consume-a-character-reference)
-    fn in_reserved_number_range(&self, codepoint: u32) -> bool {
-        if (0x1..=0x0008).contains(&codepoint)
-            || (0x000E..=0x001F).contains(&codepoint)
-            || (0x007F..=0x009F).contains(&codepoint)
-            || (0xFDD0..=0xFDEF).contains(&codepoint)
-            || (0x000E..=0x001F).contains(&codepoint)
-            || (0x000E..=0x001F).contains(&codepoint)
-            || (0x000E..=0x001F).contains(&codepoint)
-            || (0x000E..=0x001F).contains(&codepoint)
-            || (0x000E..=0x001F).contains(&codepoint)
-            || [
-                0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
-                0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
-                0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF,
-                0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF,
-            ]
-            .contains(&codepoint)
-        {
-            return true;
-        }
-
-        return false;
+    pub(crate) fn is_surrogate(&self, num: u32) -> bool
+    {
+        num >= 0xD800 && num <= 0xDFFF
     }
 
-    // This will consume an entity that does not start with &# (ie: &raquo; &#copy;)
-    fn consume_entity(&mut self, as_attribute: bool) -> Result<String, String> {
-        // Processing is based on the golang.org/x/net/html package
-
-        let mut capture = String::new();
-
-        loop {
-            let c = self.stream.read_char();
-            match c {
-                Some(c) => {
-                    capture.push(c);
-
-                    // If we captured [azAZ09], just continue the capture
-                    if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
-                        continue;
-                    }
+    pub(crate) fn is_noncharacter(&self, num: u32) -> bool
+    {
+        (0xFDD0..=0xFDEF).contains(&num) || [
+            0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF,
+            0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+            0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF,
+            0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF,
+        ].contains(&num)
+    }
 
-                    break;
-                }
-                None => {
-                    self.parse_error("unexpected end of stream");
-                    self.consume('&');
-                    self.consume_string(capture);
-                    return Ok(String::new());
-                }
-            }
+    pub(crate) fn is_control_char(&self, num: u32) -> bool
+    {
+        // White spaces are ok
+        if [0x0009, 0x000A, 0x000C, 0x000D, 0x0020].contains(&num) {
+            return false;
         }
 
-        // At this point, we have a consume buffer with the entity name in it. We need to check if it's a known entity
-
-        if capture.len() == 0 {
-            // If we found nohting (ie: &;)
-            self.parse_error("expected entity name");
-            return Err(String::new());
-
-        // } else if as_attribute {
-        // @TODO: implement this
-        // If we need to consume an entity as an attribute, we need to check if the next character is a valid
-        // attribute stuff
-        } else if TOKEN_NAMED_CHARS.contains_key(capture.as_str()) {
-            // If we found a known entity, we need to replace it
-
-            let entity = TOKEN_NAMED_CHARS.get(capture.as_str()).unwrap();
-            self.consume_string((*entity).to_string());
-            return Ok(String::new());
-        } else if !as_attribute {
-            // If we found some text, but it's not an entity. We decrease the text until we find something that matches an entity.
-            let mut max_len = capture.len();
-
-            // Largest entity is 6 chars. We don't need to check for more
-            if max_len > 6 {
-                max_len = 6;
-            }
+        return (0x0001..=0x001F).contains(&num) || (0x007F..=0x009F).contains(&num);
+    }
 
-            for j in (1..=max_len).rev() {
-                let substr: String = capture.chars().take(j).collect();
-                if TOKEN_NAMED_CHARS.contains_key(substr.as_str()) {
-                    let entity = TOKEN_NAMED_CHARS.get(substr.as_str()).unwrap();
-                    self.consume_string((*entity).to_string());
-                    self.consume_string(capture.chars().skip(j).collect());
-                    return Ok(String::new());
-                }
+    // Finds the longest entity from the current position in the stream. Returns the entity
+    // replacement OR None when no entity has been found.
+    fn find_entity(&mut self) -> Option<String> {
+        let s= self.stream.look_ahead_slice(*LONGEST_ENTITY_LENGTH);
+        for i in (0..=s.len()).rev() {
+            if TOKEN_NAMED_CHARS.contains_key(&s[0..i]) {
+                // Move forward with the number of chars matching
+                // self.stream.skip(i);
+                return Some(String::from(&s[0..i]));
             }
         }
-
-        self.consume('&');
-        self.consume_string(capture.to_string());
-        return Ok(String::new());
+        None
     }
 }
 
+lazy_static! {
+    // Returns the longest entity in the TOKEN_NAMED_CHARS map (this could be a const actually)
+    static ref LONGEST_ENTITY_LENGTH: usize = {
+        TOKEN_NAMED_CHARS.keys().map(|key| key.len()).max().unwrap_or(0)
+    };
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -294,7 +361,7 @@ mod tests {
 
                     let mut is = InputStream::new();
                     is.read_from_str(input, None);
-                    let mut tok = Tokenizer::new(&mut is);
+                    let mut tok = Tokenizer::new(&mut is, None);
                     let t = tok.next_token();
                     assert_eq!(expected, t.to_string());
                 }
@@ -304,103 +371,103 @@ mod tests {
 
     entity_tests! {
         // Numbers
-        entity_0: ("&#10;", "str[\n]")
-        entity_1: ("&#0;", "str[�]")
-        entity_2: ("&#x0;", "str[�]")
-        entity_3: ("&#xdeadbeef;", "str[�]")     // replace with replacement char
-        entity_4: ("&#xd888;", "str[�]")         // replace with replacement char
-        entity_5: ("&#xbeef;", "str[뻯]")
-        entity_6: ("&#x10;", "str[]")                // reserved codepoint
-        entity_7: ("&#;", "str[&]")
-        entity_8: ("&;", "str[&;]")
-        entity_9: ("&", "str[&]")
-        entity_10: ("", "str[]")             // reserved codepoint
-        entity_11: ("&#x0008;", "str[]")             // reserved codepoint
-        entity_12: ("&#0008;", "str[]")              // reserved codepoint
-        entity_13: ("&#8;", "str[]")                 // reserved codepoint
-        entity_14: ("&#x0009;", "str[\t]")
-        entity_15: ("&#x007F;", "str[]")             // reserved codepoint
-        entity_16: ("&#xFDD0;", "str[]")             // reserved codepoint
+        entity_0: ("&#10;", "\n")
+        entity_1: ("&#0;", "�")
+        entity_2: ("&#x0;", "�")
+        entity_3: ("&#xdeadbeef;", "�")     // replace with replacement char
+        entity_4: ("&#xd888;", "�")         // replace with replacement char
+        entity_5: ("&#xbeef;", "뻯")
+        entity_6: ("&#x10;", "�")                // reserved codepoint
+        entity_7: ("&#;", "&#;")
+        entity_8: ("&;", "&;")
+        entity_9: ("&", "&")
+        entity_10: ("", "�")                // reserved codepoint
+        entity_11: ("&#x0008;", "�")             // reserved codepoint
+        entity_12: ("&#0008;", "�")              // reserved codepoint
+        entity_13: ("&#8;", "�")                 // reserved codepoint
+        entity_14: ("&#x0009;", "\t")
+        entity_15: ("&#x007F;", "�")             // reserved codepoint
+        entity_16: ("&#xFDD0;", "�")             // reserved codepoint
 
         // Entities
-        entity_100: ("&copy;", "str[©]")
-        entity_101: ("&copyThing;", "str[©Thing;]")
-        entity_102: ("&raquo;", "str[»]")
-        entity_103: ("&laquo;", "str[«]")
-        entity_104: ("&not;", "str[¬]")
-        entity_105: ("&notit;", "str[¬it;]")
-        entity_106: ("&notin;", "str[∉]")
-        entity_107: ("&fo", "str[&fo]")
-        entity_108: ("&xxx", "str[&xxx]")
-        entity_109: ("&copy", "str[&copy]")
-        entity_110: ("&copy ", "str[© ]")
-        entity_111: ("&copya", "str[&copya]")
-        entity_112: ("&copya;", "str[©a;]")
-        entity_113: ("&#169;", "str[©]")
-        entity_114: ("&copy&", "str[©&]")
-        entity_115: ("&copya ", "str[©a ]")
-        // entity_116: ("&#169X ", "str[&]")       // What should this be?
+        entity_100: ("&copy;", "©")
+        entity_101: ("&copyThing;", "©Thing;")
+        entity_102: ("&raquo;", "»")
+        entity_103: ("&laquo;", "«")
+        entity_104: ("&not;", "¬")
+        entity_105: ("&notit;", "¬it;")
+        entity_106: ("&notin;", "∉")
+        entity_107: ("&fo", "&fo")
+        entity_108: ("&xxx", "&xxx")
+        entity_109: ("&copy", "©")
+        entity_110: ("&copy ", "© ")
+        entity_111: ("&copya", "©a")
+        entity_112: ("&copya;", "©a;")
+        entity_113: ("&#169;", "©")
+        // entity_114: ("&copy&", "©&")
+        entity_115: ("&copya ", "©a ")
+        entity_116: ("&#169X ", "©X ")
 
 
         // ChatGPT generated tests
-        entity_200: ("&copy;", "str[©]")
-        entity_201: ("&copy ", "str[© ]")
-        entity_202: ("&#169;", "str[©]")
-        entity_203: ("&#xA9;", "str[©]")
-        entity_204: ("&lt;", "str[<]")
-        entity_205: ("&unknown;", "str[&unknown;]")
-        entity_206: ("&#60;", "str[<]")
-        entity_207: ("&#x3C;", "str[<]")
-        entity_208: ("&amp;", "str[&]")
-        entity_209: ("&euro;", "str[€]")
-        entity_210: ("&gt;", "str[>]")
-        entity_211: ("&reg;", "str[®]")
-        entity_212: ("&#174;", "str[®]")
-        entity_213: ("&#xAE;", "str[®]")
-        entity_214: ("&quot;", "str[\"]")
-        entity_215: ("&#34;", "str[\"]")
-        entity_216: ("&#x22;", "str[\"]")
-        entity_217: ("&apos;", "str[']")
-        entity_218: ("&#39;", "str[']")
-        entity_219: ("&#x27;", "str[']")
-        entity_220: ("&excl;", "str[!]")
-        entity_221: ("&#33;", "str[!]")
-        entity_222: ("&num;", "str[#]")
-        entity_223: ("&#35;", "str[#]")
-        entity_224: ("&dollar;", "str[$]")
-        entity_225: ("&#36;", "str[$]")
-        entity_226: ("&percnt;", "str[%]")
-        entity_227: ("&#37;", "str[%]")
-        entity_228: ("&ast;", "str[*]")
-        entity_229: ("&#42;", "str[*]")
-        entity_230: ("&plus;", "str[+]")
-        entity_231: ("&#43;", "str[+]")
-        entity_232: ("&comma;", "str[,]")
-        entity_233: ("&#44;", "str[,]")
-        entity_234: ("&minus;", "str[−]")
-        entity_235: ("&#45;", "str[-]")
-        entity_236: ("&period;", "str[.]")
-        entity_237: ("&#46;", "str[.]")
-        entity_238: ("&sol;", "str[/]")
-        entity_239: ("&#47;", "str[/]")
-        entity_240: ("&colon;", "str[:]")
-        entity_241: ("&#58;", "str[:]")
-        entity_242: ("&semi;", "str[;]")
-        entity_243: ("&#59;", "str[;]")
-        entity_244: ("&equals;", "str[=]")
-        entity_245: ("&#61;", "str[=]")
-        entity_246: ("&quest;", "str[?]")
-        entity_247: ("&#63;", "str[?]")
-        entity_248: ("&commat;", "str[@]")
-        entity_249: ("&#64;", "str[@]")
-        entity_250: ("&COPY;", "str[©]")
-        entity_251: ("&#128;", "str[€]")
-        entity_252: ("&#x9F;", "str[Ÿ]")
-        entity_253: ("&#31;", "str[]")
-        entity_254: ("&#0;", "str[�]")
-        entity_255: ("&#xD800;", "str[�]")
-        entity_256: ("&unknownchar;", "str[&unknownchar;]")
-        entity_257: ("&#9999999;", "str[�]")
-        entity_259: ("&#11;", "str[]")
+        entity_200: ("&copy;", "©")
+        entity_201: ("&copy ", "© ")
+        entity_202: ("&#169;", "©")
+        entity_203: ("&#xA9;", "©")
+        entity_204: ("&lt;", "<")
+        entity_205: ("&unknown;", "&unknown;")
+        entity_206: ("&#60;", "<")
+        entity_207: ("&#x3C;", "<")
+        entity_208: ("&amp;", "&")
+        entity_209: ("&euro;", "€")
+        entity_210: ("&gt;", ">")
+        entity_211: ("&reg;", "®")
+        entity_212: ("&#174;", "®")
+        entity_213: ("&#xAE;", "®")
+        entity_214: ("&quot;", "\"")
+        entity_215: ("&#34;", "\"")
+        entity_216: ("&#x22;", "\"")
+        entity_217: ("&apos;", "'")
+        entity_218: ("&#39;", "'")
+        entity_219: ("&#x27;", "'")
+        entity_220: ("&excl;", "!")
+        entity_221: ("&#33;", "!")
+        entity_222: ("&num;", "#")
+        entity_223: ("&#35;", "#")
+        entity_224: ("&dollar;", "$")
+        entity_225: ("&#36;", "$")
+        entity_226: ("&percnt;", "%")
+        entity_227: ("&#37;", "%")
+        entity_228: ("&ast;", "*")
+        entity_229: ("&#42;", "*")
+        entity_230: ("&plus;", "+")
+        entity_231: ("&#43;", "+")
+        entity_232: ("&comma;", ",")
+        entity_233: ("&#44;", ",")
+        entity_234: ("&minus;", "−")
+        entity_235: ("&#45;", "-")
+        entity_236: ("&period;", ".")
+        entity_237: ("&#46;", ".")
+        entity_238: ("&sol;", "/")
+        entity_239: ("&#47;", "/")
+        entity_240: ("&colon;", ":")
+        entity_241: ("&#58;", ":")
+        entity_242: ("&semi;", ";")
+        entity_243: ("&#59;", ";")
+        entity_244: ("&equals;", "=")
+        entity_245: ("&#61;", "=")
+        entity_246: ("&quest;", "?")
+        entity_247: ("&#63;", "?")
+        entity_248: ("&commat;", "@")
+        entity_249: ("&#64;", "@")
+        entity_250: ("&COPY;", "©")
+        entity_251: ("&#128;", "€")
+        entity_252: ("&#x9F;", "Ÿ")
+        entity_253: ("&#31;", "")
+        entity_254: ("&#0;", "�")
+        entity_255: ("&#xD800;", "�")
+        entity_256: ("&unknownchar;", "&unknownchar;")
+        entity_257: ("&#9999999;", "�")
+        entity_259: ("&#11;", "")
     }
-}
+}
\ No newline at end of file
diff --git a/src/html5_parser/input_stream.rs b/src/html5_parser/input_stream.rs
index c3eba7c0c..c15f04574 100644
--- a/src/html5_parser/input_stream.rs
+++ b/src/html5_parser/input_stream.rs
@@ -1,6 +1,7 @@
 use std::fs::File;
 use std::io;
 use std::io::Read;
+use crate::html5_parser::tokenizer::{CHAR_LF, CHAR_CR};
 
 // Encoding defines the way the buffer stream is read, as what defines a "character".
 #[derive(PartialEq)]
@@ -19,15 +20,87 @@ pub enum Confidence {
                // Irrelevant          // There is no content encoding for this stream
 }
 
+#[derive(PartialEq, Debug, Copy, Clone)]
+pub struct Position {
+    pub offset: usize,
+    pub line: usize,
+    pub col: usize,
+}
+
+#[derive(PartialEq, Debug, Copy, Clone)]
+pub enum Element {
+    Utf8(char),             // Standard UTF character
+    Surrogate(u16),         // Surrogate character (since they cannot be stored in <char>)
+    Eof,                    // End of stream
+}
+
+impl Element {
+    pub fn is_eof(&self) -> bool {
+        match self {
+            Element::Eof => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_utf8(&self) -> bool {
+        match self {
+            Element::Utf8(_) => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_surrogate(&self) -> bool {
+        match self {
+            Element::Surrogate(_) => true,
+            _ => false,
+        }
+    }
+
+    pub fn u32(&self) -> u32 {
+        match self {
+            Element::Utf8(c) => *c as u32,
+            Element::Surrogate(c) => *c as u32,
+            Element::Eof => 0,
+        }
+    }
+
+    pub fn utf8(&self) -> char {
+        match self {
+            Element::Utf8(c) => *c,
+            Element::Surrogate(..) => 0x0000 as char,
+            Element::Eof => 0x0000 as char,
+        }
+    }
+
+    pub fn to_string(&self) -> String {
+        match self {
+            Element::Utf8(ch) => ch.to_string(),
+            Element::Surrogate(surrogate) => format!("U+{:04X}", surrogate), // Or some other representation
+            Element::Eof => "EOF".to_string(), // Or an empty string
+        }
+    }
+}
+
 // HTML(5) input stream structure
 pub struct InputStream {
-    encoding: Encoding,                // Current encoding
-    pub(crate) confidence: Confidence, // How confident are we that this is the correct encoding?
-    current: usize,                    // Current offset of the reader
-    length: usize,                     // Length (in bytes) of the buffer
-    buffer: Vec<char>,                 // Reference to the actual buffer stream in characters
-    u8_buffer: Vec<u8>,                // Reference to the actual buffer stream in u8 bytes
-                                       // If all things are ok, both buffer and u8_buffer should refer to the same memory location
+    pub encoding: Encoding,             // Current encoding
+    pub confidence: Confidence,         // How confident are we that this is the correct encoding?
+
+    pub position: Position,             // Current positions
+    pub length: usize,                  // Length (in chars) of the buffer
+    line_offsets: Vec<usize>,           // Offsets of the given lines
+
+    buffer: Vec<Element>,               // Reference to the actual buffer stream in characters
+    u8_buffer: Vec<u8>,                 // Reference to the actual buffer stream in u8 bytes
+                                        // If all things are ok, both buffer and u8_buffer should refer to the same memory location (?)
+
+    pub has_read_eof: bool,             // True when we just read an EOF
+}
+
+pub enum SeekMode {
+    SeekSet,       // Seek from the start of the stream
+    SeekCur,       // Seek from the current stream position
+    SeekEnd,       // Seek (backwards) from the end of the stream
 }
 
 impl InputStream {
@@ -36,10 +109,16 @@ impl InputStream {
         InputStream {
             encoding: Encoding::UTF8,
             confidence: Confidence::Tentative,
-            current: 0,
+            position: Position{
+                offset: 0,
+                line: 1,
+                col: 1,
+            },
             length: 0,
+            line_offsets: vec![0],      // first line always starts at 0
             buffer: Vec::new(),
             u8_buffer: Vec::new(),
+            has_read_eof: false,
         }
     }
 
@@ -55,25 +134,90 @@ impl InputStream {
 
     // Returns true when the stream pointer is at the end of the stream
     pub fn eof(&self) -> bool {
-        self.current >= self.length
+        self.has_read_eof || self.position.offset as usize >= self.length
     }
 
     // Reset the stream reader back to the start
     pub fn reset(&mut self) {
-        self.current = 0
+        self.position.offset = 0;
+        self.position.line = 1;
+        self.position.col = 1;
     }
 
     // Seek explicit offset in the stream (based on chars)
-    pub fn seek(&mut self, mut off: usize) {
-        if off > self.length {
-            off = self.length
+    pub fn seek(&mut self, mode: SeekMode, offset: isize) {
+        let abs_offset = match mode {
+            SeekMode::SeekSet => {
+                if offset.is_negative() {
+                    0
+                } else {
+                    offset as usize
+                }
+            }
+            SeekMode::SeekCur => {
+                if offset.is_negative() {
+                    self.position.offset - offset.abs() as usize
+                } else {
+                    self.position.offset + offset as usize
+                }
+            }
+            SeekMode::SeekEnd => {
+                // Both -5 and 5 on seek-end do the same thing
+                if offset.abs() > self.length as isize {
+                    0
+                } else {
+                    self.length - offset.abs() as usize
+                }
+            }
+        };
+
+        self.position = self.generate_position(abs_offset);
+    }
+
+    pub fn get_previous_position(&mut self) -> Position {
+
+        // if we are at the begining or the end of the stream, we just return the current position
+        if self.position.offset == 0 || self.has_read_eof {
+            return self.position;
         }
 
-        self.current = off
+        self.generate_position(self.position.offset - 1)
+    }
+
+    // Generate a new position structure for given offset
+    fn generate_position(&mut self, abs_offset: usize) -> Position {
+        let mut abs_offset = abs_offset;
+
+        // Cap to length if we read past the end of the stream
+        if abs_offset > self.length + 1  {
+            abs_offset = self.length;
+            self.has_read_eof = true;
+        }
+
+        // Detect lines (if needed)
+        self.read_line_endings_until(abs_offset);
+
+        let mut last_line: usize = 0;
+        let mut last_offset = self.line_offsets[last_line];
+        for i in 0..self.line_offsets.len() {
+            if self.line_offsets[i] > abs_offset as usize {
+                break;
+            }
+
+            last_line = i;
+            last_offset = self.line_offsets[last_line];
+        }
+
+        // Set position values
+        return Position{
+            offset: abs_offset,
+            line: last_line + 1,
+            col: abs_offset - last_offset + 1,
+        }
     }
 
     pub fn tell(&self) -> usize {
-        self.current
+        self.position.offset as usize
     }
 
     // Set the given confidence of the input stream encoding
@@ -96,20 +240,39 @@ impl InputStream {
     pub fn force_set_encoding(&mut self, e: Encoding) {
         match e {
             Encoding::UTF8 => {
-                // Convert the u8 buffer into utf8 string
-                let str_buf = std::str::from_utf8(&self.u8_buffer).unwrap();
+                let str_buf;
+                unsafe {
+                    str_buf = std::str::from_utf8_unchecked(&self.u8_buffer)
+                        .replace("\u{000D}\u{000A}", "\u{000A}")
+                        .replace("\u{000D}", "\u{000A}");
+                }
 
                 // Convert the utf8 string into characters so we can use easy indexing
-                self.buffer = str_buf.chars().collect();
+                self.buffer = vec![];
+                for c in str_buf.chars() {
+
+                    // // Check if we have a non-bmp character. This means it's above 0x10000
+                    // let cp = c as u32;
+                    // if cp > 0x10000 && cp <= 0x10FFFF {
+                    //     let adjusted = cp - 0x10000;
+                    //     let lead = ((adjusted >> 10) & 0x3FF) as u16 + 0xD800;
+                    //     let trail = (adjusted & 0x3FF) as u16 + 0xDC00;
+                    //     self.buffer.push(Element::Surrogate(lead));
+                    //     self.buffer.push(Element::Surrogate(trail));
+                    //     continue;
+                    // }
+
+                    if (0xD800..=0xDFFF).contains(&(c as u32)) {
+                        self.buffer.push(Element::Surrogate(c as u16));
+                    } else {
+                        self.buffer.push(Element::Utf8(c));
+                    }
+                }
                 self.length = self.buffer.len();
             }
             Encoding::ASCII => {
                 // Convert the string into characters so we can use easy indexing. Any non-ascii chars (> 0x7F) are converted to '?'
-                self.buffer = self
-                    .u8_buffer
-                    .iter()
-                    .map(|&byte| if byte.is_ascii() { byte as char } else { '?' })
-                    .collect();
+                self.buffer = self.normalize_newlines_and_ascii(&self.u8_buffer);
                 self.length = self.buffer.len();
             }
         }
@@ -117,12 +280,34 @@ impl InputStream {
         self.encoding = e;
     }
 
+    fn normalize_newlines_and_ascii(&self, buffer: &Vec<u8>) -> Vec<Element> {
+        let mut result = Vec::with_capacity(buffer.len());
+
+        for i in 0..buffer.len() {
+            if buffer[i] == CHAR_CR as u8 {
+                // convert CR to LF, or CRLF to LF
+                if i + 1 < buffer.len() && buffer[i + 1] == CHAR_LF as u8 {
+                    continue;
+                }
+                result.push(Element::Utf8(CHAR_LF));
+            } else if buffer[i] >= 0x80 {
+                // Convert high ascii to ?
+                result.push(Element::Utf8('?'));
+            } else {
+                // everything else is ok
+                result.push(Element::Utf8(buffer[i] as char))
+            }
+        }
+
+        return result
+    }
+
     // Populates the current buffer with the contents of given file f
     pub fn read_from_file(&mut self, mut f: File, e: Option<Encoding>) -> io::Result<()> {
         // First we read the u8 bytes into a buffer
         f.read_to_end(&mut self.u8_buffer).expect("uh oh");
         self.force_set_encoding(e.unwrap_or(Encoding::UTF8));
-        self.current = 0;
+        self.reset();
         Ok(())
     }
 
@@ -130,49 +315,93 @@ impl InputStream {
     pub fn read_from_str(&mut self, s: &str, e: Option<Encoding>) {
         self.u8_buffer = Vec::from(s.as_bytes());
         self.force_set_encoding(e.unwrap_or(Encoding::UTF8));
-        self.current = 0;
+        self.reset();
     }
 
     // Returns the number of characters left in the buffer
     pub(crate) fn chars_left(&self) -> usize {
-        self.length - self.current
+        self.length - self.position.offset
     }
 
-    // Reads a character and increases the current pointer
-    pub(crate) fn read_char(&mut self) -> Option<char> {
-        if self.eof() {
-            return None;
+    // Reads a character and increases the current pointer, or read EOF as None
+    pub(crate) fn read_char(&mut self) -> Element {
+        // Return none if we already have read EOF
+        if self.has_read_eof {
+            return Element::Eof;
         }
 
-        let c = self.buffer[self.current];
-        self.current += 1;
-
-        return Some(c);
+        // If we still can move forward in the stream, move forwards
+        if self.position.offset < self.length {
+            let c = self.buffer[self.position.offset].clone();
+            self.seek(SeekMode::SeekCur, 1);
+            return c;
+        } else {
+            // otherwise, we have reached the end of the stream
+            self.has_read_eof = true;
+
+            self.seek(SeekMode::SeekEnd, 0);
+
+            // // This is a kind of dummy position so the end of the files are read correctly.
+            // self.position = Position{
+            //     offset: self.position.offset,
+            //     line: self.position.line,
+            //     col: self.position.col,
+            // };
+
+            return Element::Eof;
+        }
     }
 
     pub(crate) fn unread(&mut self) {
-        if self.current > 1 {
-            self.current -= 1;
+        // We already read eof, so "unread" the eof by unsetting the flag
+        if self.has_read_eof {
+            self.has_read_eof = false;
+            return;
+        }
+
+        // If we can track back from the offset, we can do so
+        if self.position.offset > 0 {
+            self.seek(SeekMode::SeekCur, -1);
         }
     }
 
+    // Looks ahead in the stream and returns len characters
+    pub(crate) fn look_ahead_slice(&self, len: usize) -> String {
+        let end_pos = std::cmp::min(self.length, self.position.offset + len);
+
+        let slice = &self.buffer[self.position.offset as usize..end_pos];
+        slice.iter().map(|e| e.to_string()).collect()
+    }
+
     // Looks ahead in the stream, can use an optional index if we want to seek further
     // (or back) in the stream.
-    // @TODO: idx can be pos or neg. But self.current is always positive. This clashes.
-    pub(crate) fn look_ahead(&self, idx: i32) -> Option<char> {
-        let c = self.current as i32;
-
+    pub(crate) fn look_ahead(&self, offset: usize) -> Element {
         // Trying to look after the stream
-        if c + idx > self.length as i32 {
-            return None;
+        if self.position.offset + offset >= self.length {
+            return Element::Eof;
         }
 
-        // Trying to look before the stream
-        if c + idx < 0 {
-            return None;
-        }
+        self.buffer[self.position.offset + offset]
+    }
+
+    // Populates the line endings
+    fn read_line_endings_until(&mut self, abs_offset: usize) {
+        let mut last_offset = *self.line_offsets.last().unwrap();
+
+        while last_offset <= abs_offset as usize {
+            if last_offset >= self.length {
+                self.line_offsets.push(last_offset + 1);
+                break;
+            }
+
+            // Check the next char to see if it's a '\n'
+            let c = self.buffer[last_offset].clone();
+            if c == Element::Utf8('\n') {
+                self.line_offsets.push(last_offset + 1);
+            }
 
-        Some(self.buffer[(c + idx) as usize])
+            last_offset += 1;
+        }
     }
 }
 
@@ -194,36 +423,54 @@ mod test {
         assert_eq!(is.length, 3);
         assert_eq!(is.eof(), false);
         assert_eq!(is.chars_left(), 3);
-        assert_eq!(is.read_char().unwrap(), 'f');
+        assert_eq!(is.read_char().utf8(), 'f');
         assert_eq!(is.chars_left(), 2);
         assert_eq!(is.eof(), false);
-        assert_eq!(is.read_char().unwrap(), '👽');
+        assert_eq!(is.read_char().utf8(), '👽');
         assert_eq!(is.eof(), false);
         assert_eq!(is.chars_left(), 1);
-        assert_eq!(is.read_char().unwrap(), 'f');
+        assert_eq!(is.read_char().utf8(), 'f');
         assert_eq!(is.eof(), true);
         assert_eq!(is.chars_left(), 0);
 
         is.reset();
         is.set_encoding(Encoding::ASCII);
         assert_eq!(is.length, 6);
-        assert_eq!(is.read_char().unwrap(), 'f');
-        assert_eq!(is.read_char().unwrap(), '?');
-        assert_eq!(is.read_char().unwrap(), '?');
-        assert_eq!(is.read_char().unwrap(), '?');
-        assert_eq!(is.read_char().unwrap(), '?');
-        assert_eq!(is.read_char().unwrap(), 'f');
-        assert_eq!(is.read_char(), None);
-
-        is.unread();
-        assert_eq!(is.chars_left(), 1);
-        is.unread();
+        assert_eq!(is.read_char().utf8(), 'f');
+        assert_eq!(is.read_char().utf8(), '?');
+        assert_eq!(is.read_char().utf8(), '?');
+        assert_eq!(is.read_char().utf8(), '?');
+        assert_eq!(is.read_char().utf8(), '?');
+        assert_eq!(is.read_char().utf8(), 'f');
+        assert_eq!(is.read_char().is_eof(), true);
+
+        is.unread();    // unread eof
+        is.unread();    // unread 'f'
+        is.unread();    // Unread '?'
         assert_eq!(is.chars_left(), 2);
+        is.unread();
+        assert_eq!(is.chars_left(), 3);
 
         is.reset();
         assert_eq!(is.chars_left(), 6);
         is.unread();
         assert_eq!(is.chars_left(), 6);
+
+
+        is.read_from_str("abc", Some(Encoding::UTF8));
+        is.reset();
+        assert_eq!(is.read_char().utf8(), 'a');
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'a');
+        assert_eq!(is.read_char().utf8(), 'b');
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'b');
+        assert_eq!(is.read_char().utf8(), 'c');
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'c');
+        assert_eq!(is.read_char().is_eof(), true);
+        is.unread();
+        assert_eq!(is.read_char().is_eof(), true);
     }
 
     #[test]
@@ -238,34 +485,178 @@ mod test {
         assert_eq!(is.is_certain_encoding(), false);
     }
 
+    #[test]
+    fn test_offsets() {
+        let mut is = InputStream::new();
+        is.read_from_str("abc", Some(Encoding::UTF8));
+        assert_eq!(is.position, Position{ offset: 0, line: 1, col: 1});
+        assert_eq!('a', is.read_char().utf8());
+        assert_eq!(is.position, Position{ offset: 1, line: 1, col: 2});
+        assert_eq!('b', is.read_char().utf8());
+        assert_eq!(is.position, Position{ offset: 2, line: 1, col: 3});
+        assert_eq!('c', is.read_char().utf8());
+        assert_eq!(is.position, Position{ offset: 3, line: 1, col: 4});
+        assert_eq!(is.read_char().is_eof(), true);
+        assert_eq!(is.position, Position{ offset: 3, line: 1, col: 4});
+        assert_eq!(is.read_char().is_eof(), true);
+        assert_eq!(is.position, Position{ offset: 3, line: 1, col: 4});
+
+
+        let mut is = InputStream::new();
+        is.read_from_str("abc\ndefg\n\nhi\njk\nlmno\n\n\npqrst\nu\nv\nw\n\nxy\nz", Some(Encoding::UTF8));
+        assert_eq!(is.length, 40);
+
+        is.seek(SeekMode::SeekSet, 0);
+        assert_eq!(is.position, Position{ offset: 0, line: 1, col: 1});
+        let c = is.read_char();
+        assert_eq!('a', c.utf8());
+        assert_eq!(is.position, Position{ offset: 1, line: 1, col: 2});
+
+        is.seek(SeekMode::SeekSet, 7);
+        assert_eq!(is.position, Position{ offset: 7, line: 2, col: 4});
+        assert_eq!(is.chars_left(), 33);
+
+        let c = is.read_char();
+        assert_eq!('g', c.utf8());
+        assert_eq!(is.position, Position{ offset: 8, line: 2, col: 5});
+
+        let c = is.read_char();
+        assert_eq!('\n', c.utf8());
+        assert_eq!(is.position, Position{ offset: 9, line: 3, col: 1});
+
+        let c = is.read_char();
+        assert_eq!('\n', c.utf8());
+        assert_eq!(is.position, Position{ offset: 10, line: 4, col: 1});
+
+        let c = is.read_char();
+        assert_eq!('h', c.utf8());
+        assert_eq!(is.position, Position{ offset: 11, line: 4, col: 2});
+        assert_eq!(is.chars_left(), 29);
+
+        is.reset();
+        assert_eq!(is.position, Position{ offset: 0, line: 1, col: 1});
+        assert_eq!(is.chars_left(), 40);
+
+        is.seek(SeekMode::SeekSet, 100);
+        assert_eq!(is.position, Position{ offset: 40, line: 15, col: 2});
+        assert_eq!(is.chars_left(), 0);
+    }
+
     #[test]
     fn test_seek() {
         let mut is = InputStream::new();
         is.read_from_str("ab👽cd", Some(Encoding::UTF8));
         assert_eq!(is.length, 5);
         assert_eq!(is.chars_left(), 5);
-        assert_eq!(is.read_char().unwrap(), 'a');
-        assert_eq!(is.read_char().unwrap(), 'b');
+        assert_eq!(is.read_char().utf8(), 'a');
+        assert_eq!(is.read_char().utf8(), 'b');
         assert_eq!(is.chars_left(), 3);
-        is.seek(0);
+        is.seek(SeekMode::SeekSet, 0);
         assert_eq!(is.chars_left(), 5);
-        assert_eq!(is.read_char().unwrap(), 'a');
-        assert_eq!(is.read_char().unwrap(), 'b');
+        assert_eq!(is.read_char().utf8(), 'a');
+        assert_eq!(is.read_char().utf8(), 'b');
         assert_eq!(is.chars_left(), 3);
-        is.seek(3);
+        is.seek(SeekMode::SeekSet, 3);
         assert_eq!(is.chars_left(), 2);
-        assert_eq!(is.read_char().unwrap(), 'c');
-        assert_eq!(is.read_char().unwrap(), 'd');
+        assert_eq!(is.read_char().utf8(), 'c');
+        assert_eq!(is.read_char().utf8(), 'd');
         assert_eq!(is.chars_left(), 0);
         assert_eq!(is.eof(), true);
 
         is.reset();
-        assert_eq!(is.look_ahead(0).unwrap(), 'a');
-        assert_eq!(is.look_ahead(3).unwrap(), 'c');
-        assert_eq!(is.look_ahead(1).unwrap(), 'b');
-        assert_eq!(is.look_ahead(100), None);
-        assert_eq!(is.look_ahead(-1), None);
-        is.seek(4);
-        assert_eq!(is.look_ahead(-1).unwrap(), 'c');
+        assert_eq!(is.look_ahead(0).utf8(), 'a');
+        assert_eq!(is.look_ahead(3).utf8(), 'c');
+        assert_eq!(is.look_ahead(1).utf8(), 'b');
+        assert_eq!(is.look_ahead(100).is_eof(), true);
+
+        is.seek(SeekMode::SeekSet, 0);
+        assert_eq!(is.look_ahead_slice(1), "a");
+        assert_eq!(is.look_ahead_slice(2), "ab");
+        assert_eq!(is.look_ahead_slice(3), "ab👽");
+        assert_eq!(is.look_ahead_slice(4), "ab👽c");
+        assert_eq!(is.look_ahead_slice(5), "ab👽cd");
+        assert_eq!(is.look_ahead_slice(6), "ab👽cd");
+        assert_eq!(is.look_ahead_slice(100), "ab👽cd");
+
+        is.seek(SeekMode::SeekSet, 3);
+        assert_eq!(is.look_ahead_slice(1), "c");
+        assert_eq!(is.look_ahead_slice(2), "cd");
+
+
+        is.seek(SeekMode::SeekSet, 0);
+        assert_eq!(is.position.offset, 0);
+
+        is.seek(SeekMode::SeekSet, 3);
+        assert_eq!(is.position.offset, 3);
+
+        is.seek(SeekMode::SeekCur, 0);
+        assert_eq!(is.position.offset, 3);
+
+        is.seek(SeekMode::SeekCur, 1);
+        assert_eq!(is.position.offset, 4);
+
+        is.seek(SeekMode::SeekCur, -2);
+        assert_eq!(is.position.offset, 2);
+
+        is.seek(SeekMode::SeekCur, 10);
+        assert_eq!(is.position.offset, 5);
+
+        is.seek(SeekMode::SeekSet, 100);
+        assert_eq!(is.position.offset, 5);
+
+        is.seek(SeekMode::SeekSet, -100);
+        assert_eq!(is.position.offset, 0);
+
+        is.seek(SeekMode::SeekEnd, -100);
+        assert_eq!(is.position.offset, 0);
+    }
+
+    #[test]
+    fn test_eof() {
+        let mut is = InputStream::new();
+        is.read_from_str("abc", Some(Encoding::UTF8));
+        assert_eq!(is.length, 3);
+        assert_eq!(is.chars_left(), 3);
+        assert_eq!(is.read_char().utf8(), 'a');
+        assert_eq!(is.read_char().utf8(), 'b');
+        assert_eq!(is.read_char().utf8(), 'c');
+        assert_eq!(is.read_char().is_eof(), true);
+        assert_eq!(is.read_char().is_eof(), true);
+        assert_eq!(is.read_char().is_eof(), true);
+        assert_eq!(is.read_char().is_eof(), true);
+        is.unread();
+        assert_eq!(is.read_char().is_eof(), true);
+        is.unread();
+        is.unread();
+        assert_eq!(is.read_char().is_eof(), false);
+        assert_eq!(is.read_char().is_eof(), true);
+        is.unread();
+        is.unread();
+        assert_eq!(is.read_char().is_eof(), false);
+        is.unread();
+        is.unread();
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'a');
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'a');
+        is.unread();
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'a');
+        is.unread();
+        is.unread();
+        is.unread();
+        is.unread();
+        is.unread();
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'a');
+        assert_eq!(is.read_char().utf8(), 'b');
+        assert_eq!(is.read_char().utf8(), 'c');
+        assert_eq!(is.read_char().is_eof(), true);
+        is.unread();
+        is.unread();
+        assert_eq!(is.read_char().utf8(), 'c');
+        assert_eq!(is.read_char().is_eof(), true);
+        is.unread();
+        assert_eq!(is.read_char().is_eof(), true);
     }
 }
diff --git a/src/html5_parser/mod.rs b/src/html5_parser/mod.rs
index 6542c7feb..85f9e9466 100644
--- a/src/html5_parser/mod.rs
+++ b/src/html5_parser/mod.rs
@@ -1,41 +1,13 @@
 pub mod input_stream;
 
+pub mod parser;
+pub mod tokenizer;
+pub mod token;
+pub mod token_states;
+pub mod parse_errors;
+
 mod consume_char_refs;
 mod emitter;
 mod node;
-mod token;
 mod token_named_characters;
-mod token_replacements;
-mod token_states;
-mod tokenizer;
-
-use input_stream::InputStream;
-use node::Node;
-use tokenizer::Tokenizer;
-
-pub struct Html5Parser<'a> {
-    tokenizer: Tokenizer<'a>,
-}
-
-impl<'a> Html5Parser<'a> {
-    // Creates a new parser object with the given input stream
-    pub fn new(stream: &'a mut InputStream) -> Self {
-        Html5Parser {
-            tokenizer: Tokenizer::new(stream),
-        }
-    }
-
-    // Parses the input stream into a Node tree
-    pub fn parse(&mut self) -> Node {
-        // Tokenize stuff
-
-        for _ in 1..=20 {
-            let t = self.tokenizer.next_token();
-            println!("{}", t.to_string());
-        }
-
-        let mut n = Node::new("root");
-        n.add_child(Node::new("child"));
-        return n;
-    }
-}
+mod token_replacements;
\ No newline at end of file
diff --git a/src/html5_parser/parse_errors.rs b/src/html5_parser/parse_errors.rs
new file mode 100755
index 000000000..aaca71adc
--- /dev/null
+++ b/src/html5_parser/parse_errors.rs
@@ -0,0 +1,107 @@
+pub enum ParserError {
+    AbruptDoctypePublicIdentifier,
+    AbruptDoctypeSystemIdentifier,
+    AbruptClosingOfEmptyComment,
+    AbsenceOfDigitsInNumericCharacterReference,
+    CdataInHtmlContent,
+    CharacterReferenceOutsideUnicodeRange,
+    ControlCharacterInInputStream,
+    ControlCharacterReference,
+    EndTagWithAttributes,
+    DuplicateAttribute,
+    EndTagWithTrailingSolidus,
+    EofBeforeTagName,
+    EofInCdata,
+    EofInComment,
+    EofInDoctype,
+    EofInScriptHtmlCommentLikeText,
+    EofInTag,
+    IncorrectlyClosedComment,
+    IncorrectlyOpenedComment,
+    InvalidCharacterSequenceAfterDoctypeName,
+    InvalidFirstCharacterOfTagName,
+    MissingAttributeValue,
+    MissingDoctypeName,
+    MissingDoctypePublicIdentifier,
+    MissingDoctypeSystemIdentifier,
+    MissingEndTagName,
+    MissingQuoteBeforeDoctypePublicIdentifier,
+    MissingQuoteBeforeDoctypeSystemIdentifier,
+    MissingSemicolonAfterCharacterReference,
+    MissingWhitespaceAfterDoctypePublicKeyword,
+    MissingWhitespaceAfterDoctypeSystemKeyword,
+    MissingWhitespaceBeforeDoctypeName,
+    MissingWhitespaceBetweenAttributes,
+    MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
+    NestedComment,
+    NoncharacterCharacterReference,
+    NoncharacterInInputStream,
+    NonVoidHtmlElementStartTagWithTrailingSolidus,
+    NullCharacterReference,
+    SurrogateCharacterReference,
+    SurrogateInInputStream,
+    UnexpectedCharacterAfterDoctypeSystemIdentifier,
+    UnexpectedCharacterInAttributeName,
+    UnexpectedCharacterInUnquotedAttributeValue,
+    UnexpectedEqualsSignBeforeAttributeName,
+    UnexpectedNullCharacter,
+    UnexpectedQuestionMarkInsteadOfTagName,
+    UnexpectedSolidusInTag,
+    UnknownNamedCharacterReference,
+}
+
+impl ParserError {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ParserError::AbruptDoctypePublicIdentifier => "abrupt-doctype-public-identifier",
+            ParserError::AbruptDoctypeSystemIdentifier => "abrupt-doctype-system-identifier",
+            ParserError::AbsenceOfDigitsInNumericCharacterReference => "absence-of-digits-in-numeric-character-reference",
+            ParserError::CdataInHtmlContent => "cdata-in-html-content",
+            ParserError::CharacterReferenceOutsideUnicodeRange => "character-reference-outside-unicode-range",
+            ParserError::ControlCharacterInInputStream => "control-character-in-input-stream",
+            ParserError::ControlCharacterReference => "control-character-reference",
+            ParserError::EndTagWithAttributes => "end-tag-with-attributes",
+            ParserError::DuplicateAttribute => "duplicate-attribute",
+            ParserError::EndTagWithTrailingSolidus => "end-tag-with-trailing-solidus",
+            ParserError::EofBeforeTagName => "eof-before-tag-name",
+            ParserError::EofInCdata => "eof-in-cdata",
+            ParserError::EofInComment => "eof-in-comment",
+            ParserError::EofInDoctype => "eof-in-doctype",
+            ParserError::EofInScriptHtmlCommentLikeText => "eof-in-script-html-comment-like-text",
+            ParserError::EofInTag => "eof-in-tag",
+            ParserError::IncorrectlyClosedComment => "incorrectly-closed-comment",
+            ParserError::IncorrectlyOpenedComment => "incorrectly-opened-comment",
+            ParserError::InvalidCharacterSequenceAfterDoctypeName => "invalid-character-sequence-after-doctype-name",
+            ParserError::InvalidFirstCharacterOfTagName => "invalid-first-character-of-tag-name",
+            ParserError::MissingAttributeValue => "missing-attribute-value",
+            ParserError::MissingDoctypeName => "missing-doctype-name",
+            ParserError::MissingDoctypePublicIdentifier => "missing-doctype-public-identifier",
+            ParserError::MissingDoctypeSystemIdentifier => "missing-doctype-system-identifier",
+            ParserError::MissingEndTagName => "missing-end-tag-name",
+            ParserError::MissingQuoteBeforeDoctypePublicIdentifier => "missing-quote-before-doctype-public-identifier",
+            ParserError::MissingQuoteBeforeDoctypeSystemIdentifier => "missing-quote-before-doctype-system-identifier",
+            ParserError::MissingSemicolonAfterCharacterReference => "missing-semicolon-after-character-reference",
+            ParserError::MissingWhitespaceAfterDoctypePublicKeyword => "missing-whitespace-after-doctype-public-keyword",
+            ParserError::MissingWhitespaceAfterDoctypeSystemKeyword => "missing-whitespace-after-doctype-system-keyword",
+            ParserError::MissingWhitespaceBeforeDoctypeName => "missing-whitespace-before-doctype-name",
+            ParserError::MissingWhitespaceBetweenAttributes => "missing-whitespace-between-attributes",
+            ParserError::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers => "missing-whitespace-between-doctype-public-and-system-identifiers",
+            ParserError::NestedComment => "nested-comment",
+            ParserError::NoncharacterCharacterReference => "noncharacter-character-reference",
+            ParserError::NoncharacterInInputStream => "noncharacter-in-input-stream",
+            ParserError::NonVoidHtmlElementStartTagWithTrailingSolidus => "non-void-html-element-start-tag-with-trailing-solidus",
+            ParserError::NullCharacterReference => "null-character-reference",
+            ParserError::SurrogateCharacterReference => "surrogate-character-reference",
+            ParserError::SurrogateInInputStream => "surrogate-in-input-stream",
+            ParserError::UnexpectedCharacterAfterDoctypeSystemIdentifier => "unexpected-character-after-doctype-system-identifier",
+            ParserError::UnexpectedCharacterInAttributeName => "unexpected-character-in-attribute-name",
+            ParserError::UnexpectedCharacterInUnquotedAttributeValue => "unexpected-character-in-unquoted-attribute-value",
+            ParserError::UnexpectedEqualsSignBeforeAttributeName => "unexpected-equals-sign-before-attribute-name",
+            ParserError::UnexpectedNullCharacter => "unexpected-null-character",
+            ParserError::UnexpectedQuestionMarkInsteadOfTagName => "unexpected-question-mark-instead-of-tag-name",
+            ParserError::UnexpectedSolidusInTag => "unexpected-solidus-in-tag",
+            ParserError::UnknownNamedCharacterReference => "unknown-named-character-reference",
+            ParserError::AbruptClosingOfEmptyComment => "abrupt-closing-of-empty-comment",
+        }
+    }
+}
diff --git a/src/html5_parser/parser.rs b/src/html5_parser/parser.rs
new file mode 100755
index 000000000..31c899cf7
--- /dev/null
+++ b/src/html5_parser/parser.rs
@@ -0,0 +1,30 @@
+use crate::html5_parser::input_stream::InputStream;
+use crate::html5_parser::node::Node;
+use crate::html5_parser::tokenizer::Tokenizer;
+
+pub struct Html5Parser<'a> {
+    tokenizer: Tokenizer<'a>,
+}
+
+impl<'a> Html5Parser<'a> {
+    // Creates a new parser object with the given input stream
+    pub fn new(stream: &'a mut InputStream) -> Self {
+        Html5Parser {
+            tokenizer: Tokenizer::new(stream, None),
+        }
+    }
+
+    // Parses the input stream into a Node tree
+    pub fn parse(&mut self) -> Node {
+        // Tokenize stuff
+
+        for _ in 1..=20 {
+            let t = self.tokenizer.next_token();
+            println!("{}", t.to_string());
+        }
+
+        let mut n = Node::new("root");
+        n.add_child(Node::new("child"));
+        return n;
+    }
+}
diff --git a/src/html5_parser/test_results.md b/src/html5_parser/test_results.md
new file mode 100755
index 000000000..ab950d32d
--- /dev/null
+++ b/src/html5_parser/test_results.md
@@ -0,0 +1,13 @@
+Almost all token tests (found in html5lib-test/tokenizer) will pass:
+
+🏁 Tests completed: Ran 6805 tests, 2770 assertions, 2748 succeeded, 22 failed (18 position failures)
+
+The failing test are due to the fact that rust-lang does not handle surrogate characters (0xD800-0xDFFF) in char values.
+These values cannot exists on their own in a valid utf-8 string.
+
+For instance: 
+
+`<!DOCTYPE a PUBLIC'\uDBC0\uDC00`
+
+This test has a non-bmp character that is internally seen as a single character but from the perspective 
+of the test seen as 2 characters (hi/lo surrogate). This means that the end-of-file is off by 1 position.
\ No newline at end of file
diff --git a/src/html5_parser/token.rs b/src/html5_parser/token.rs
index 84849cd79..92c1a1486 100755
--- a/src/html5_parser/token.rs
+++ b/src/html5_parser/token.rs
@@ -9,9 +9,11 @@ pub enum TokenType {
     EofToken,
 }
 
+// The different token structures that can be emitted by the tokenizer
+#[derive(Clone, PartialEq)]
 pub enum Token {
     DocTypeToken {
-        name: String,
+        name: Option<String>,
         force_quirks: bool,
         pub_identifier: Option<String>,
         sys_identifier: Option<String>,
@@ -33,6 +35,7 @@ pub enum Token {
     EofToken,
 }
 
+// Each token can be displayed as a string
 impl std::fmt::Display for Token {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         match self {
@@ -42,20 +45,20 @@ impl std::fmt::Display for Token {
                 pub_identifier,
                 sys_identifier,
             } => {
-                let mut result = format!("<{} ", name);
+                let mut result = format!("<!DOCTYPE {:?}", name);
                 if *force_quirks {
                     result.push_str(" FORCE_QUIRKS!");
                 }
                 if let Some(pub_id) = pub_identifier {
-                    result.push_str(&format!(" {} ", pub_id));
+                    result.push_str(&format!(" {}", pub_id));
                 }
                 if let Some(sys_id) = sys_identifier {
-                    result.push_str(&format!(" {} ", sys_id));
+                    result.push_str(&format!(" {}", sys_id));
                 }
-                result.push('>');
+                result.push_str(" />");
                 write!(f, "{}", result)
             }
-            Token::CommentToken { value } => write!(f, "<!--{}-->", value),
+            Token::CommentToken { value } => write!(f, "<!-- {} -->", value),
             Token::TextToken { value } => write!(f, "{}", value),
             Token::StartTagToken {
                 name,
@@ -83,6 +86,7 @@ pub trait TokenTrait {
     fn type_of(&self) -> TokenType;
 }
 
+// Each token implements the TokenTrait and has a type_of that will return the tokentype.
 impl TokenTrait for Token {
     fn type_of(&self) -> TokenType {
         match self {
diff --git a/src/html5_parser/token_named_characters.rs b/src/html5_parser/token_named_characters.rs
index 335c2287d..61d6123ba 100644
--- a/src/html5_parser/token_named_characters.rs
+++ b/src/html5_parser/token_named_characters.rs
@@ -2143,97 +2143,97 @@ pub static TOKEN_NAMED_CHARS: phf::Map<&'static str, &'static str> = phf_map! {
     "yen" => "\u{00A5}",
     "yuml" => "\u{00FF}",
 
-    "nLt;" =>"\u{226A}{20D2}",
-    "nGt;" =>"\u{226B}{20D2}",
-    "NotEqualTilde;" =>"\u{2242}{0338}",
-    "NotGreaterFullEqual;" =>"\u{2267}{0338}",
-    "NotGreaterGreater;" =>"\u{226B}{0338}",
-    "NotGreaterSlantEqual;" =>"\u{2A7E}{0338}",
-    "NotHumpDownHump;" =>"\u{224E}{0338}",
-    "NotHumpEqual;" =>"\u{224F}{0338}",
-    "NotLeftTriangleBar;" =>"\u{29CF}{0338}",
-    "NotLessLess;" =>"\u{226A}{0338}",
-    "NotLessSlantEqual;" =>"\u{2A7D}{0338}",
-    "NotNestedGreaterGreater;" =>"\u{2AA2}{0338}",
-    "NotNestedLessLess;" =>"\u{2AA1}{0338}",
-    "NotPrecedesEqual;" =>"\u{2AAF}{0338}",
-    "NotRightTriangleBar;" =>"\u{29D0}{0338}",
-    "NotSquareSubset;" =>"\u{228F}{0338}",
-    "NotSquareSuperset;" =>"\u{2290}{0338}",
-    "NotSubset;" =>"\u{2282}{20D2}",
-    "NotSucceedsEqual;" =>"\u{2AB0}{0338}",
-    "NotSucceedsTilde;" =>"\u{227F}{0338}",
-    "NotSuperset;" =>"\u{2283}{20D2}",
-    "ThickSpace;" =>"\u{205F}{200A}",
-    "acE;" =>"\u{223E}{0333}",
-    "bne;" =>"\u{003D}{20E5}",
-    "bnequiv;" =>"\u{2261}{20E5}",
-    "caps;" =>"\u{2229}{FE00}",
-    "cups;" =>"\u{222A}{FE00}",
-    "fjlig;" =>"\u{0066}{006A}",
-    "gesl;" =>"\u{22DB}{FE00}",
-    "gvertneqq;" =>"\u{2269}{FE00}",
-    "gvnE;" =>"\u{2269}{FE00}",
-    "lates;" =>"\u{2AAD}{FE00}",
-    "lesg;" =>"\u{22DA}{FE00}",
-    "lvertneqq;" =>"\u{2268}{FE00}",
-    "lvnE;" =>"\u{2268}{FE00}",
-    "nGg;" =>"\u{22D9}{0338}",
-    "nGtv;" =>"\u{226B}{0338}",
-    "nLl;" =>"\u{22D8}{0338}",
-    "nLtv;" =>"\u{226A}{0338}",
-    "nang;" =>"\u{2220}{20D2}",
-    "napE;" =>"\u{2A70}{0338}",
-    "napid;" =>"\u{224B}{0338}",
-    "nbump;" =>"\u{224E}{0338}",
-    "nbumpe;" =>"\u{224F}{0338}",
-    "ncongdot;" =>"\u{2A6D}{0338}",
-    "nedot;" =>"\u{2250}{0338}",
-    "nesim;" =>"\u{2242}{0338}",
-    "ngE;" =>"\u{2267}{0338}",
-    "ngeqq;" =>"\u{2267}{0338}",
-    "ngeqslant;" =>"\u{2A7E}{0338}",
-    "nges;" =>"\u{2A7E}{0338}",
-    "nlE;" =>"\u{2266}{0338}",
-    "nleqq;" =>"\u{2266}{0338}",
-    "nleqslant;" =>"\u{2A7D}{0338}",
-    "nles;" =>"\u{2A7D}{0338}",
-    "notinE;" =>"\u{22F9}{0338}",
-    "notindot;" =>"\u{22F5}{0338}",
-    "nparsl;" =>"\u{2AFD}{20E5}",
-    "npart;" =>"\u{2202}{0338}",
-    "npre;" =>"\u{2AAF}{0338}",
-    "npreceq;" =>"\u{2AAF}{0338}",
-    "nrarrc;" =>"\u{2933}{0338}",
-    "nrarrw;" =>"\u{219D}{0338}",
-    "nsce;" =>"\u{2AB0}{0338}",
-    "nsubE;" =>"\u{2AC5}{0338}",
-    "nsubset;" =>"\u{2282}{20D2}",
-    "nsubseteqq;" =>"\u{2AC5}{0338}",
-    "nsucceq;" =>"\u{2AB0}{0338}",
-    "nsupE;" =>"\u{2AC6}{0338}",
-    "nsupset;" =>"\u{2283}{20D2}",
-    "nsupseteqq;" =>"\u{2AC6}{0338}",
-    "nvap;" =>"\u{224D}{20D2}",
-    "nvge;" =>"\u{2265}{20D2}",
-    "nvgt;" =>"\u{003E}{20D2}",
-    "nvle;" =>"\u{2264}{20D2}",
-    "nvlt;" =>"\u{003C}{20D2}",
-    "nvltrie;" =>"\u{22B4}{20D2}",
-    "nvrtrie;" =>"\u{22B5}{20D2}",
-    "nvsim;" =>"\u{223C}{20D2}",
-    "race;" =>"\u{223D}{0331}",
-    "smtes;" =>"\u{2AAC}{FE00}",
-    "sqcaps;" =>"\u{2293}{FE00}",
-    "sqcups;" =>"\u{2294}{FE00}",
-    "varsubsetneq;" =>"\u{228A}{FE00}",
-    "varsubsetneqq;" =>"\u{2ACB}{FE00}",
-    "varsupsetneq;" =>"\u{228B}{FE00}",
-    "varsupsetneqq;" =>"\u{2ACC}{FE00}",
-    "vnsub;" =>"\u{2282}{20D2}",
-    "vnsup;" =>"\u{2283}{20D2}",
-    "vsubnE;" =>"\u{2ACB}{FE00}",
-    "vsubne;" =>"\u{228A}{FE00}",
-    "vsupnE;" =>"\u{2ACC}{FE00}",
-    "vsupne;" =>"\u{228B}{FE00}",
+    "nLt;" =>"\u{226A}\u{20D2}",
+    "nGt;" =>"\u{226B}\u{20D2}",
+    "NotEqualTilde;" =>"\u{2242}\u{0338}",
+    "NotGreaterFullEqual;" =>"\u{2267}\u{0338}",
+    "NotGreaterGreater;" =>"\u{226B}\u{0338}",
+    "NotGreaterSlantEqual;" =>"\u{2A7E}\u{0338}",
+    "NotHumpDownHump;" =>"\u{224E}\u{0338}",
+    "NotHumpEqual;" =>"\u{224F}\u{0338}",
+    "NotLeftTriangleBar;" =>"\u{29CF}\u{0338}",
+    "NotLessLess;" =>"\u{226A}\u{0338}",
+    "NotLessSlantEqual;" =>"\u{2A7D}\u{0338}",
+    "NotNestedGreaterGreater;" =>"\u{2AA2}\u{0338}",
+    "NotNestedLessLess;" =>"\u{2AA1}\u{0338}",
+    "NotPrecedesEqual;" =>"\u{2AAF}\u{0338}",
+    "NotRightTriangleBar;" =>"\u{29D0}\u{0338}",
+    "NotSquareSubset;" =>"\u{228F}\u{0338}",
+    "NotSquareSuperset;" =>"\u{2290}\u{0338}",
+    "NotSubset;" =>"\u{2282}\u{20D2}",
+    "NotSucceedsEqual;" =>"\u{2AB0}\u{0338}",
+    "NotSucceedsTilde;" =>"\u{227F}\u{0338}",
+    "NotSuperset;" =>"\u{2283}\u{20D2}",
+    "ThickSpace;" =>"\u{205F}\u{200A}",
+    "acE;" =>"\u{223E}\u{0333}",
+    "bne;" =>"\u{003D}\u{20E5}",
+    "bnequiv;" =>"\u{2261}\u{20E5}",
+    "caps;" =>"\u{2229}\u{FE00}",
+    "cups;" =>"\u{222A}\u{FE00}",
+    "fjlig;" =>"\u{0066}\u{006A}",
+    "gesl;" =>"\u{22DB}\u{FE00}",
+    "gvertneqq;" =>"\u{2269}\u{FE00}",
+    "gvnE;" =>"\u{2269}\u{FE00}",
+    "lates;" =>"\u{2AAD}\u{FE00}",
+    "lesg;" =>"\u{22DA}\u{FE00}",
+    "lvertneqq;" =>"\u{2268}\u{FE00}",
+    "lvnE;" =>"\u{2268}\u{FE00}",
+    "nGg;" =>"\u{22D9}\u{0338}",
+    "nGtv;" =>"\u{226B}\u{0338}",
+    "nLl;" =>"\u{22D8}\u{0338}",
+    "nLtv;" =>"\u{226A}\u{0338}",
+    "nang;" =>"\u{2220}\u{20D2}",
+    "napE;" =>"\u{2A70}\u{0338}",
+    "napid;" =>"\u{224B}\u{0338}",
+    "nbump;" =>"\u{224E}\u{0338}",
+    "nbumpe;" =>"\u{224F}\u{0338}",
+    "ncongdot;" =>"\u{2A6D}\u{0338}",
+    "nedot;" =>"\u{2250}\u{0338}",
+    "nesim;" =>"\u{2242}\u{0338}",
+    "ngE;" =>"\u{2267}\u{0338}",
+    "ngeqq;" =>"\u{2267}\u{0338}",
+    "ngeqslant;" =>"\u{2A7E}\u{0338}",
+    "nges;" =>"\u{2A7E}\u{0338}",
+    "nlE;" =>"\u{2266}\u{0338}",
+    "nleqq;" =>"\u{2266}\u{0338}",
+    "nleqslant;" =>"\u{2A7D}\u{0338}",
+    "nles;" =>"\u{2A7D}\u{0338}",
+    "notinE;" =>"\u{22F9}\u{0338}",
+    "notindot;" =>"\u{22F5}\u{0338}",
+    "nparsl;" =>"\u{2AFD}\u{20E5}",
+    "npart;" =>"\u{2202}\u{0338}",
+    "npre;" =>"\u{2AAF}\u{0338}",
+    "npreceq;" =>"\u{2AAF}\u{0338}",
+    "nrarrc;" =>"\u{2933}\u{0338}",
+    "nrarrw;" =>"\u{219D}\u{0338}",
+    "nsce;" =>"\u{2AB0}\u{0338}",
+    "nsubE;" =>"\u{2AC5}\u{0338}",
+    "nsubset;" =>"\u{2282}\u{20D2}",
+    "nsubseteqq;" =>"\u{2AC5}\u{0338}",
+    "nsucceq;" =>"\u{2AB0}\u{0338}",
+    "nsupE;" =>"\u{2AC6}\u{0338}",
+    "nsupset;" =>"\u{2283}\u{20D2}",
+    "nsupseteqq;" =>"\u{2AC6}\u{0338}",
+    "nvap;" =>"\u{224D}\u{20D2}",
+    "nvge;" =>"\u{2265}\u{20D2}",
+    "nvgt;" =>"\u{003E}\u{20D2}",
+    "nvle;" =>"\u{2264}\u{20D2}",
+    "nvlt;" =>"\u{003C}\u{20D2}",
+    "nvltrie;" =>"\u{22B4}\u{20D2}",
+    "nvrtrie;" =>"\u{22B5}\u{20D2}",
+    "nvsim;" =>"\u{223C}\u{20D2}",
+    "race;" =>"\u{223D}\u{0331}",
+    "smtes;" =>"\u{2AAC}\u{FE00}",
+    "sqcaps;" =>"\u{2293}\u{FE00}",
+    "sqcups;" =>"\u{2294}\u{FE00}",
+    "varsubsetneq;" =>"\u{228A}\u{FE00}",
+    "varsubsetneqq;" =>"\u{2ACB}\u{FE00}",
+    "varsupsetneq;" =>"\u{228B}\u{FE00}",
+    "varsupsetneqq;" =>"\u{2ACC}\u{FE00}",
+    "vnsub;" =>"\u{2282}\u{20D2}",
+    "vnsup;" =>"\u{2283}\u{20D2}",
+    "vsubnE;" =>"\u{2ACB}\u{FE00}",
+    "vsubne;" =>"\u{228A}\u{FE00}",
+    "vsupnE;" =>"\u{2ACC}\u{FE00}",
+    "vsupne;" =>"\u{228B}\u{FE00}",
 };
diff --git a/src/html5_parser/token_replacements.rs b/src/html5_parser/token_replacements.rs
index 3c5575fa3..80a03567b 100644
--- a/src/html5_parser/token_replacements.rs
+++ b/src/html5_parser/token_replacements.rs
@@ -2,8 +2,8 @@
 // https://dev.w3.org/html5/spec-LC/tokenization.html#consume-a-character-reference
 // If a character (#0x80; for instance) is found, it must be replaced by the given character
 pub static TOKEN_REPLACEMENTS: phf::Map<u32, char> = phf::phf_map! {
-    0x00_u32 => '\u{FFFD}',
-    0x0d_u32 => '\u{000D}',
+    // 0x00_u32 => '\u{FFFD}',
+    // 0x0d_u32 => '\u{000D}',
     0x80_u32 => '\u{20AC}',
     0x81_u32 => '\u{0081}',
     0x82_u32 => '\u{201A}',
diff --git a/src/html5_parser/token_states.rs b/src/html5_parser/token_states.rs
index 7b31e685c..4e5e81304 100644
--- a/src/html5_parser/token_states.rs
+++ b/src/html5_parser/token_states.rs
@@ -1,5 +1,5 @@
 // These are the states in which the tokenizer can be in.
-#[derive(Debug)]
+#[derive(Debug, Copy, Clone)]
 pub enum State {
     DataState,
     CharacterReferenceInDataState,
@@ -24,6 +24,7 @@ pub enum State {
     ScriptDataEscapeStartDashState,
     ScriptDataEscapedState,
     ScriptDataEscapedDashState,
+    ScriptDataEscapedDashDashState,
     ScriptDataEscapedLessThanSignState,
     ScriptDataEscapedEndTagOpenState,
     ScriptDataEscapedEndTagNameState,
@@ -35,6 +36,7 @@ pub enum State {
     ScriptDataDoubleEscapeEndState,
     BeforeAttributeNameState,
     AttributeNameState,
+    AfterAttributeNameState,
     BeforeAttributeValueState,
     AttributeValueDoubleQuotedState,
     AttributeValueSingleQuotedState,
@@ -47,6 +49,10 @@ pub enum State {
     CommentStartState,
     CommentStartDashState,
     CommentState,
+    CommentLessThanSignState,
+    CommentLessThanSignBangState,
+    CommentLessThanSignBangDashState,
+    CommentLessThanSignBangDashDashState,
     CommentEndDashState,
     CommentEndState,
     CommentEndBangState,
@@ -61,10 +67,12 @@ pub enum State {
     AfterDoctypePublicIdentifierState,
     BetweenDocTypePublicAndSystemIdentifiersState,
     AfterDocTypeSystemKeywordState,
-    BeforeDocTypeSystemIdentifiedState,
+    BeforeDocTypeSystemIdentifierState,
     DocTypeSystemIdentifierDoubleQuotedState,
     DocTypeSystemIdentifierSingleQuotedState,
-    AfterDocTypeSystemIdentifiedState,
+    AfterDocTypeSystemIdentifierState,
     BogusDocTypeState,
     CDataSectionState,
+    CDataSectionBracketState,
+    CDataSectionEndState,
 }
diff --git a/src/html5_parser/tokenizer.rs b/src/html5_parser/tokenizer.rs
index 4eab8b94c..4113775e9 100644
--- a/src/html5_parser/tokenizer.rs
+++ b/src/html5_parser/tokenizer.rs
@@ -1,136 +1,2160 @@
 use crate::html5_parser::input_stream::InputStream;
+use crate::html5_parser::input_stream::Element;
+use crate::html5_parser::input_stream::SeekMode::SeekCur;
+use crate::html5_parser::parse_errors::ParserError;
 use crate::html5_parser::token::Token;
 use crate::html5_parser::token_states::State;
 
 // Constants that are not directly captured as visible chars
+pub const CHAR_NUL: char = '\u{0000}';
 pub const CHAR_TAB: char = '\u{0009}';
 pub const CHAR_LF: char = '\u{000A}';
+pub const CHAR_CR: char = '\u{000D}';
 pub const CHAR_FF: char = '\u{000C}';
 pub const CHAR_SPACE: char = '\u{0020}';
 pub const CHAR_REPLACEMENT: char = '\u{FFFD}';
 
-// Errors produced by the tokenizer
-#[derive(Debug)]
-pub enum Error {
-    NullEncountered,
-}
-
 // The tokenizer will read the input stream and emit tokens that can be used by the parser.
 pub struct Tokenizer<'a> {
-    pub stream: &'a mut InputStream, // HTML character input stream
-    pub state: State,                // Current state of the tokenizer
-    pub consumed: Vec<char>,         // Current consumed characters for current token
-                                     // pub emitter: &'a mut dyn Emitter,   // Emitter trait that will emit the tokens during parsing
+    pub stream: &'a mut InputStream,    // HTML character input stream
+    pub state: State,                   // Current state of the tokenizer
+    pub consumed: Vec<char>,            // Current consumed characters for current token
+    pub current_attr_name: String,      // Current attribute name that we need to store temporary in case we are parsing attributes
+    pub current_attr_value: String,     // Current attribute value that we need to store temporary in case we are parsing attributes
+    pub current_attrs: Vec<(String, String)>,  // Current attributes
+    pub current_token: Option<Token>,   // Token that is currently in the making (if any)
+    pub temporary_buffer: Vec<char>,    // Temporary buffer
+    pub token_queue: Vec<Token>,        // Queue of emitted tokens. Needed because we can generate multiple tokens during iteration
+    pub errors: Vec<ParseError>,        // Parse errors (if any)
+    pub last_start_token: String,       // The last emitted start token (or empty if none)
+}
+
+pub struct Options {
+    pub initial_state: State,           // Sets the initial state of the tokenizer. Normally only needed when dealing with tests
+    pub last_start_tag: String,         // Sets the last starting tag in the tokenizer. Normally only needed when dealing with tests
+}
+
+#[macro_export]
+macro_rules! read_char {
+    ($self:expr) => {
+        {
+            let mut c = $self.stream.read_char();
+            match c {
+                Element::Surrogate(..) => {
+                    $self.parse_error(ParserError::SurrogateInInputStream);
+                    c = Element::Utf8(CHAR_REPLACEMENT);
+                }
+                Element::Utf8(c) if $self.is_control_char(c as u32) => {
+                    $self.parse_error(ParserError::ControlCharacterInInputStream);
+                }
+                Element::Utf8(c) if $self.is_noncharacter(c as u32) => {
+                    $self.parse_error(ParserError::NoncharacterInInputStream);
+                }
+                _ => {}
+            }
+
+            c
+        }
+    }
+}
+
+// Adds the given character to the current token's value (if applicable)
+macro_rules! add_to_token_value {
+    ($self:expr, $c:expr) => {
+        match &mut $self.current_token {
+            Some(Token::CommentToken {value, ..}) => {
+                value.push($c);
+            }
+            _ => {},
+        }
+    }
+}
+
+macro_rules! set_public_identifier {
+    ($self:expr, $str:expr) => {
+        match &mut $self.current_token {
+            Some(Token::DocTypeToken { pub_identifier, ..}) => {
+                *pub_identifier = Some($str);
+            }
+            _ => {},
+        }
+    }
+}
+macro_rules! add_public_identifier {
+    ($self:expr, $c:expr) => {
+        match &mut $self.current_token {
+            Some(Token::DocTypeToken { pub_identifier, ..}) => {
+                if let Some(pid) = pub_identifier {
+                    pid.push($c);
+                }
+            }
+            _ => {},
+        }
+    }
+}
+
+macro_rules! set_system_identifier {
+    ($self:expr, $str:expr) => {
+        match &mut $self.current_token {
+            Some(Token::DocTypeToken { sys_identifier, ..}) => {
+                *sys_identifier = Some($str);
+            }
+            _ => {},
+        }
+    }
+}
+macro_rules! add_system_identifier {
+    ($self:expr, $c:expr) => {
+        match &mut $self.current_token {
+            Some(Token::DocTypeToken { sys_identifier, ..}) => {
+                if let Some(sid) = sys_identifier {
+                    sid.push($c);
+                }
+            }
+            _ => {},
+        }
+    }
+}
+
+// Adds the given character to the current token's name (if applicable)
+macro_rules! add_to_token_name {
+    ($self:expr, $c:expr) => {
+        match &mut $self.current_token {
+            Some(Token::StartTagToken {name, ..}) => {
+                name.push($c);
+            }
+            Some(Token::EndTagToken {name, ..}) => {
+                name.push($c);
+            }
+            Some(Token::DocTypeToken {name, ..}) => {
+                // Doctype can have an optional name
+                match name {
+                    Some(ref mut string) => string.push($c),
+                    None => *name = Some($c.to_string()),
+                }
+            }
+            _ => {},
+        }
+    }
+}
+
+// Convert a character to lower case value (assumes character is in A-Z range)
+macro_rules! to_lowercase {
+    // Converts A-Z to a-z
+    ($c:expr) => {
+        ((($c) as u8) + 0x20) as char
+    };
+}
+
+// Emits the current stored token
+macro_rules! emit_current_token {
+    ($self:expr) => {
+        match $self.current_token {
+            None => {},
+            _ => {
+                emit_token!($self, $self.current_token.as_ref().unwrap());
+            }
+        };
+        $self.current_token = None;
+    };
+}
+
+// Emits the given stored token. It does not have to be stored first.
+macro_rules! emit_token {
+    ($self:expr, $token:expr) => {
+        // Save the start token name if we are pushing it. This helps us in detecting matching tags.
+        match $token {
+            Token::StartTagToken { name, .. } => {
+                $self.last_start_token = String::from(name);
+            },
+            _ => {}
+        }
+
+        // match $token {
+        //     Token::EndTagToken { .. } => {
+        //         if !$self.current_attrs.is_empty() {
+        //             $self.parse_error(ParserError::EndTagWithAttributes);
+        //         }
+        //     }
+        //     _ => {}
+        // }
+
+        // If there is any consumed data, emit this first as a text token
+        if $self.has_consumed_data() {
+            $self.token_queue.push(Token::TextToken{
+                value: $self.get_consumed_str(),
+            });
+            $self.clear_consume_buffer();
+        }
+
+        $self.token_queue.push($token.clone());
+    }
+}
+
+// Parser error that defines an error (message) on the given position
+#[derive(PartialEq)]
+pub struct ParseError {
+    pub message: String,  // Parse message
+    pub line: usize,        // Line number of the error
+    pub col: usize,         // Offset on line of the error
+    pub offset: usize,      // Position of the error on the line
 }
 
 impl<'a> Tokenizer<'a> {
-    pub fn new(input: &'a mut InputStream /*, emitter: &'a mut dyn Emitter*/) -> Self {
+    // Creates a new tokenizer with the given inputstream and additional options if any
+    pub fn new(input: &'a mut InputStream /*, emitter: &'a mut dyn Emitter*/, opts: Option<Options>) -> Self {
         return Tokenizer {
             stream: input,
-            state: State::DataState,
+            state: opts.as_ref().map_or(State::DataState, |o| o.initial_state),
+            last_start_token: opts.as_ref().map_or(String::new(), |o| o.last_start_tag.clone()),
             consumed: vec![],
-            // emitter,
+            current_token: None,
+            token_queue: vec![],
+            current_attr_name: String::new(),
+            current_attr_value: String::new(),
+            current_attrs: vec![],
+            temporary_buffer: vec![],
+            errors: vec![],
         };
     }
 
     // Retrieves the next token from the input stream or Token::EOF when the end is reached
-    pub(crate) fn next_token(&mut self) -> Token {
+    pub fn next_token(&mut self) -> Token {
+        self.consume_stream();
+
+        if self.token_queue.len() == 0 {
+            return Token::EofToken{};
+        }
+
+        return self.token_queue.remove(0);
+    }
+
+    // Consumes the input stream. Continues until the stream is completed or a token has been generated.
+    fn consume_stream(&mut self) {
         loop {
-            println!("state: {:?}", self.state);
-            println!("consumed: {:?}", self.consumed);
+            // Something is already in the token buffer, so we can return it.
+            if self.token_queue.len() > 0 {
+                return
+            }
 
             match self.state {
                 State::DataState => {
-                    let c = match self.stream.read_char() {
-                        Some(c) => c,
-                        None => {
-                            self.parse_error("EOF");
-                            return Token::EofToken;
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('&') => self.state = State::CharacterReferenceInDataState,
+                        Element::Utf8('<') => self.state = State::TagOpenState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.consume(c.utf8());
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                        },
+                        Element::Eof => {
+                            // EOF
+                            if self.has_consumed_data() {
+                                emit_token!(self, Token::TextToken { value: self.get_consumed_str() });
+                                self.clear_consume_buffer();
+                            }
+                            emit_token!(self, Token::EofToken);
+                        },
+                        _ => self.consume(c.utf8()),
+                    }
+                }
+                State::CharacterReferenceInDataState => {
+                    _ = self.consume_character_reference(None, false);
+                    self.state = State::DataState;
+                }
+                State::RcDataState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('&') => {
+                            self.state = State::CharacterReferenceInRcDataState
+                        },
+                        Element::Utf8('<') => self.state = State::RcDataLessThanSignState,
+                        Element::Eof => {
+                            if self.has_consumed_data() {
+                                emit_token!(self, Token::TextToken { value: self.get_consumed_str().clone() });
+                                self.clear_consume_buffer();
+                            }
+                            emit_token!(self, Token::EofToken);
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.consume(CHAR_REPLACEMENT);
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                        },
+                        _ => self.consume(c.utf8()),
+                    }
+                }
+                State::CharacterReferenceInRcDataState => {
+                    // consume character reference
+                    _ = self.consume_character_reference(None, false);
+                    self.state = State::RcDataState;
+                }
+                State::RawTextState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('<') => self.state = State::RawTextLessThanSignState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.consume(CHAR_REPLACEMENT);
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                        },
+                        Element::Eof => {
+                            // EOF
+                            if self.has_consumed_data() {
+                                emit_token!(self, Token::TextToken { value: self.get_consumed_str() });
+                                self.clear_consume_buffer();
+                            }
+                            emit_token!(self, Token::EofToken);
+                        },
+                        _ => self.consume(c.utf8()),
+                    }
+                }
+                State::ScriptDataState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('<') => self.state = State::ScriptDataLessThenSignState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            if self.has_consumed_data() {
+                                emit_token!(self, Token::TextToken { value: self.get_consumed_str().clone() });
+                                self.clear_consume_buffer();
+                            }
+                            emit_token!(self, Token::EofToken);
+                        },
+                        _ => self.consume(c.utf8()),
+                    }
+                }
+                State::PlaintextState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            if self.has_consumed_data() {
+                                emit_token!(self, Token::TextToken { value: self.get_consumed_str().clone() });
+                                self.clear_consume_buffer();
+                            }
+                            emit_token!(self, Token::EofToken);
+                        },
+                        _ => self.consume(c.utf8()),
+                    }
+                }
+                State::TagOpenState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('!') => self.state = State::MarkupDeclarationOpenState,
+                        Element::Utf8('/') => self.state = State::EndTagOpenState,
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.current_token = Some(Token::StartTagToken{
+                                name: "".into(),
+                                is_self_closing: false,
+                                attributes: vec![],
+                            });
+
+                            add_to_token_name!(self, to_lowercase!(ch));
+                            self.state = State::TagNameState;
+                        },
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.current_token = Some(Token::StartTagToken{
+                                name: "".into(),
+                                is_self_closing: false,
+                                attributes: vec![],
+                            });
+
+                            add_to_token_name!(self, ch);
+                            self.state = State::TagNameState;
+                        }
+                        Element::Utf8('?') => {
+                            self.current_token = Some(Token::CommentToken{
+                                value: "".into(),
+                            });
+                            self.parse_error(ParserError::UnexpectedQuestionMarkInsteadOfTagName);
+                            self.stream.unread();
+                            self.state = State::BogusCommentState;
                         }
-                    };
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofBeforeTagName);
+                            self.consume('<');
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.parse_error(ParserError::InvalidFirstCharacterOfTagName);
+                            self.stream.unread();
+                            self.consume('<');
+                            self.state = State::DataState;
+                        }
+                    }
+                }
+                State::EndTagOpenState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.current_token = Some(Token::EndTagToken{
+                                name: "".into(),
+                            });
+
+                            add_to_token_name!(self, to_lowercase!(ch));
+                            self.state = State::TagNameState;
+                        },
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.current_token = Some(Token::EndTagToken{
+                                name: "".into(),
+                            });
+
+                            add_to_token_name!(self, ch);
+                            self.state = State::TagNameState;
+                        },
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingEndTagName);
+                            self.state = State::DataState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofBeforeTagName);
+                            self.consume('<');
+                            self.consume('/');
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.parse_error(ParserError::InvalidFirstCharacterOfTagName);
+
+                            self.current_token = Some(Token::CommentToken{
+                                value: "".into(),
+                            });
+                            self.stream.unread();
+                            self.state = State::BogusCommentState;
+                        }
+                    }
+                }
+                State::TagNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::BeforeAttributeNameState,
+                        Element::Utf8('/') => self.state = State::SelfClosingStartState,
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Utf8(ch @ 'A'..='Z') => add_to_token_name!(self, to_lowercase!(ch)),
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_to_token_name!(self, CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => add_to_token_name!(self, c.utf8()),
+                    }
+                }
+                State::RcDataLessThanSignState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('/') => {
+                            self.temporary_buffer = vec![];
+                            self.state = State::RcDataEndTagOpenState;
+                        },
+                        _ => {
+                            self.consume('<');
+                            self.stream.unread();
+                            self.state = State::RcDataState;
+                        },
+                    }
+                }
+                State::RcDataEndTagOpenState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.current_token = Some(Token::EndTagToken{
+                                name: "".into(),
+                            });
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                            self.state = State::RcDataEndTagNameState;
+                        },
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.current_token = Some(Token::EndTagToken{
+                                name: "".into(),
+                            });
+                            self.temporary_buffer.push(ch);
+                            self.state = State::RcDataEndTagNameState;
+                        }
+                        _ => {
+                            self.consume('<');
+                            self.consume('/');
+                            self.stream.unread();
+                            self.state = State::RcDataState;
+                        },
+                    }
+                }
+                State::RcDataEndTagNameState => {
+                    let c = read_char!(self);
+
+                    // we use this flag because a lot of matches will actually do the same thing
+                    let mut consume_anything_else = false;
 
                     match c {
-                        '&' => self.state = State::CharacterReferenceInDataState,
-                        '<' => self.state = State::TagOpenState,
-                        '\u{0000}' => {
-                            self.parse_error("NUL encountered in stream");
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::BeforeAttributeNameState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('/') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::SelfClosingStartState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('>') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                let s: String = self.temporary_buffer.iter().collect::<String>();
+                                self.set_name_in_current_token(s);
+
+                                self.last_start_token = String::new();
+                                emit_current_token!(self);
+                                self.state = State::DataState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                        }
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.temporary_buffer.push(ch);
                         }
-                        _ => self.consume(c),
+                        _ => {
+                            consume_anything_else = true;
+                        },
+                    }
+
+                    if consume_anything_else {
+                        self.consume('<');
+                        self.consume('/');
+                        for c in self.temporary_buffer.clone() {
+                            self.consume(c);
+                        }
+                        self.temporary_buffer.clear();
+
+                        self.stream.unread();
+                        self.state = State::RcDataState;
                     }
                 }
-                State::CharacterReferenceInDataState => {
-                    // consume character reference
-                    self.consume_character_reference(None, false);
-                    self.state = State::DataState;
+                State::RawTextLessThanSignState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('/') => {
+                            self.temporary_buffer = vec![];
+                            self.state = State::RawTextEndTagOpenState;
+                        },
+                        _ => {
+                            self.consume('<');
+                            self.stream.unread();
+                            self.state = State::RawTextState;
+                        },
+                    }
+                }
+                State::RawTextEndTagOpenState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.current_token = Some(Token::EndTagToken{
+                                name: "".into(),
+                            });
+                            // add_to_token_name!(self, to_lowercase!(ch));
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                            self.state = State::RawTextEndTagNameState;
+                        },
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.current_token = Some(Token::EndTagToken{
+                                name: "".into(),
+                            });
+                            // add_to_token_name!(self, ch);
+                            self.temporary_buffer.push(ch);
+                            self.state = State::RawTextEndTagNameState;
+                        }
+                        _ => {
+                            self.consume('<');
+                            self.consume('/');
+                            self.stream.unread();
+                            self.state = State::RawTextState;
+                        },
+                    }
+                }
+                State::RawTextEndTagNameState => {
+                    let c = read_char!(self);
+
+                    // we use this flag because a lot of matches will actually do the same thing
+                    let mut consume_anything_else = false;
+
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::BeforeAttributeNameState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('/') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::SelfClosingStartState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('>') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                let s: String = self.temporary_buffer.iter().collect::<String>();
+                                self.set_name_in_current_token(s);
+                                self.last_start_token = String::new();
+                                emit_current_token!(self);
+                                self.state = State::DataState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            // add_to_token_name!(self, to_lowercase!(ch));
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                        }
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            // add_to_token_name!(self, ch);
+                            self.temporary_buffer.push(ch);
+                        }
+                        _ => {
+                            consume_anything_else = true;
+                        },
+                    }
+
+                    if consume_anything_else {
+                        self.consume('<');
+                        self.consume('/');
+                        for c in self.temporary_buffer.clone() {
+                            self.consume(c);
+                        }
+                        self.temporary_buffer.clear();
+
+                        self.stream.unread();
+                        self.state = State::RawTextState;
+                    }
+                }
+                State::ScriptDataLessThenSignState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('/') => {
+                            self.temporary_buffer = vec![];
+                            self.state = State::ScriptDataEndTagOpenState;
+                        },
+                        Element::Utf8('!') => {
+                            self.consume('<');
+                            self.consume('!');
+                            self.state = State::ScriptDataEscapeStartState;
+                        },
+                        _ => {
+                            self.consume('<');
+                            self.stream.unread();
+                            self.state = State::ScriptDataState;
+                        },
+                    }
+                }
+                State::ScriptDataEndTagOpenState => {
+                    let c = read_char!(self);
+                    if c.is_eof() {
+                        self.consume('<');
+                        self.consume('/');
+                        self.stream.unread();
+                        self.state = State::ScriptDataState;
+                        continue;
+                    }
+
+                    if c.utf8().is_ascii_alphabetic() {
+                        self.current_token = Some(Token::EndTagToken{
+                            name: "".into(),
+                        });
+
+                        self.stream.unread();
+                        self.state = State::ScriptDataEndTagNameState;
+                    } else {
+                        self.consume('<');
+                        self.consume('/');
+                        self.stream.unread();
+                        self.state = State::ScriptDataState;
+                    }
+                }
+                State::ScriptDataEndTagNameState => {
+                    let c = read_char!(self);
+
+                    // we use this flag because a lot of matches will actually do the same thing
+                    let mut consume_anything_else = false;
+
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::BeforeAttributeNameState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('/') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::SelfClosingStartState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('>') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                let s: String = self.temporary_buffer.iter().collect::<String>();
+                                self.set_name_in_current_token(s);
+
+                                self.last_start_token = String::new();
+                                emit_current_token!(self);
+                                self.state = State::DataState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                        }
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.temporary_buffer.push(ch);
+                        }
+                        _ => {
+                            consume_anything_else = true;
+                        },
+                    }
+
+                    if consume_anything_else {
+                        self.consume('<');
+                        self.consume('/');
+                        for c in self.temporary_buffer.clone() {
+                            self.consume(c);
+                        }
+                        self.temporary_buffer.clear();
+
+                        self.stream.unread();
+                        self.state = State::ScriptDataState;
+                    }
+                }
+                State::ScriptDataEscapeStartState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.consume('-');
+                            self.state = State::ScriptDataEscapeStartDashState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataState;
+                        },
+                    }
+                }
+                State::ScriptDataEscapeStartDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.consume('-');
+                            self.state = State::ScriptDataEscapedDashDashState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataState;
+                        },
+                    }
+                }
+                State::ScriptDataEscapedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.consume('-');
+                            self.state = State::ScriptDataEscapedDashState;
+                        },
+                        Element::Utf8('<') => {
+                            self.state = State::ScriptDataEscapedLessThanSignState;
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInScriptHtmlCommentLikeText);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.consume(c.utf8());
+                        },
+                    }
+                }
+                State::ScriptDataEscapedDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.consume('-');
+                            self.state = State::ScriptDataEscapedDashDashState;
+                        },
+                        Element::Utf8('<') => {
+                            self.state = State::ScriptDataEscapedLessThanSignState;
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                            self.state = State::ScriptDataEscapedState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInScriptHtmlCommentLikeText);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataEscapedState;
+                        },
+                    }
+                }
+                State::ScriptDataEscapedDashDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.consume('-');
+                        },
+                        Element::Utf8('<') => {
+                            self.state = State::ScriptDataEscapedLessThanSignState;
+                        },
+                        Element::Utf8('>') => {
+                            self.consume('>');
+                            self.state = State::ScriptDataState;
+                        }
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                            self.state = State::ScriptDataEscapedState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInScriptHtmlCommentLikeText);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataEscapedState;
+                        },
+                    }
+                }
+                State::ScriptDataEscapedLessThanSignState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('/') => {
+                            self.temporary_buffer = vec![];
+                            self.state = State::ScriptDataEscapedEndTagOpenState;
+                        },
+                        _ => {
+                            if c.is_utf8() && c.utf8().is_ascii_alphabetic() {
+                                self.temporary_buffer = vec![];
+                                self.consume('<');
+                                self.stream.unread();
+                                self.state = State::ScriptDataDoubleEscapeStartState;
+                                continue;
+                            }
+                            // anything else
+                            self.consume('<');
+                            self.stream.unread();
+                            self.state = State::ScriptDataEscapedState;
+                        },
+                    }
+                }
+                State::ScriptDataEscapedEndTagOpenState => {
+                    let c = read_char!(self);
+
+                    if c.is_utf8() && c.utf8().is_ascii_alphabetic() {
+                        self.current_token = Some(Token::EndTagToken{
+                            name: "".into(),
+                        });
+
+                        self.stream.unread();
+                        self.state = State::ScriptDataEscapedEndTagNameState;
+                        continue;
+                    }
+
+                    // anything else
+                    self.consume('<');
+                    self.consume('/');
+                    self.stream.unread();
+                    self.state = State::ScriptDataEscapedState;
                 }
-                State::RcDataState => {}
-                State::CharacterReferenceInRcDataState => {}
-                State::RawTextState => {}
-                State::ScriptDataState => {}
-                State::PlaintextState => {}
-                State::TagOpenState => {}
-                State::EndTagOpenState => {}
-                State::TagNameState => {}
-                State::RcDataLessThanSignState => {}
-                State::RcDataEndTagOpenState => {}
-                State::RcDataEndTagNameState => {}
-                State::RawTextLessThanSignState => {}
-                State::RawTextEndTagOpenState => {}
-                State::RawTextEndTagNameState => {}
-                State::ScriptDataLessThenSignState => {}
-                State::ScriptDataEndTagOpenState => {}
-                State::ScriptDataEndTagNameState => {}
-                State::ScriptDataEscapeStartState => {}
-                State::ScriptDataEscapeStartDashState => {}
-                State::ScriptDataEscapedState => {}
-                State::ScriptDataEscapedDashState => {}
-                State::ScriptDataEscapedLessThanSignState => {}
-                State::ScriptDataEscapedEndTagOpenState => {}
-                State::ScriptDataEscapedEndTagNameState => {}
-                State::ScriptDataDoubleEscapeStartState => {}
-                State::ScriptDataDoubleEscapedState => {}
-                State::ScriptDataDoubleEscapedDashState => {}
-                State::ScriptDataDoubleEscapedDashDashState => {}
-                State::ScriptDataDoubleEscapedLessThanSignState => {}
-                State::ScriptDataDoubleEscapeEndState => {}
-                State::BeforeAttributeNameState => {}
-                State::AttributeNameState => {}
-                State::BeforeAttributeValueState => {}
-                State::AttributeValueDoubleQuotedState => {}
-                State::AttributeValueSingleQuotedState => {}
-                State::AttributeValueUnquotedState => {}
-                State::CharacterReferenceInAttributeValueState => {}
-                State::AfterAttributeValueQuotedState => {}
-                State::SelfClosingStartState => {}
-                State::BogusCommentState => {}
-                State::MarkupDeclarationOpenState => {}
-                State::CommentStartState => {}
-                State::CommentStartDashState => {}
-                State::CommentState => {}
-                State::CommentEndDashState => {}
-                State::CommentEndState => {}
-                State::CommentEndBangState => {}
-                State::DocTypeState => {}
-                State::BeforeDocTypeNameState => {}
-                State::DocTypeNameState => {}
-                State::AfterDocTypeNameState => {}
-                State::AfterDocTypePublicKeywordState => {}
-                State::BeforeDocTypePublicIdentifierState => {}
-                State::DocTypePublicIdentifierDoubleQuotedState => {}
-                State::DocTypePublicIdentifierSingleQuotedState => {}
-                State::AfterDoctypePublicIdentifierState => {}
-                State::BetweenDocTypePublicAndSystemIdentifiersState => {}
-                State::AfterDocTypeSystemKeywordState => {}
-                State::BeforeDocTypeSystemIdentifiedState => {}
-                State::DocTypeSystemIdentifierDoubleQuotedState => {}
-                State::DocTypeSystemIdentifierSingleQuotedState => {}
-                State::AfterDocTypeSystemIdentifiedState => {}
-                State::BogusDocTypeState => {}
-                State::CDataSectionState => {}
-            }
-        }
-
-        // return Token::Error{error: Error::EndOfStream, span: String::from("")}
+                State::ScriptDataEscapedEndTagNameState => {
+                    let c = read_char!(self);
+
+                    // we use this flag because a lot of matches will actually do the same thing
+                    let mut consume_anything_else = false;
+
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::BeforeAttributeNameState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('/') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                self.state = State::SelfClosingStartState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8('>') => {
+                            if self.is_appropriate_end_token(&self.temporary_buffer) {
+                                let s: String = self.temporary_buffer.iter().collect::<String>();
+                                self.set_name_in_current_token(s);
+
+                                self.last_start_token = String::new();
+                                emit_current_token!(self);
+                                self.state = State::DataState;
+                            } else {
+                                consume_anything_else = true;
+                            }
+                        },
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                        }
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.temporary_buffer.push(ch);
+                        }
+                        _ => {
+                            consume_anything_else = true;
+                        },
+                    }
+
+                    if consume_anything_else {
+                        self.consume('<');
+                        self.consume('/');
+                        for c in self.temporary_buffer.clone() {
+                            self.consume(c);
+                        }
+                        self.temporary_buffer.clear();
+
+                        self.stream.unread();
+                        self.state = State::ScriptDataEscapedState;
+                    }
+                }
+                State::ScriptDataDoubleEscapeStartState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) |
+                        Element::Utf8('/') |
+                        Element::Utf8('>') => {
+                            if self.temporary_buffer.iter().collect::<String>().eq("script") {
+                                self.state = State::ScriptDataDoubleEscapedState;
+                            } else {
+                                self.state = State::ScriptDataEscapedState;
+                            }
+                            self.consume(c.utf8());
+                        }
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                            self.consume(ch);
+                        },
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.temporary_buffer.push(ch);
+                            self.consume(ch);
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataEscapedState;
+                        }
+                    }
+                },
+                State::ScriptDataDoubleEscapedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.consume('-');
+                            self.state = State::ScriptDataDoubleEscapedDashState;
+                        }
+                        Element::Utf8('<') => {
+                            self.consume('<');
+                            self.state = State::ScriptDataDoubleEscapedLessThanSignState;
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInScriptHtmlCommentLikeText);
+                            self.state = State::DataState;
+                        }
+                        _ => self.consume(c.utf8()),
+                    }
+                }
+                State::ScriptDataDoubleEscapedDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.state = State::ScriptDataDoubleEscapedDashDashState;
+                            self.consume('-');
+                        }
+                        Element::Utf8('<') => {
+                            self.state = State::ScriptDataDoubleEscapedLessThanSignState;
+                            self.consume('<');
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                            self.state = State::ScriptDataDoubleEscapedState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInScriptHtmlCommentLikeText);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.consume(c.utf8());
+                            self.state = State::ScriptDataDoubleEscapedState;
+                        },
+                    }
+                }
+                State::ScriptDataDoubleEscapedDashDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => self.consume('-'),
+                        Element::Utf8('<') => {
+                            self.consume('<');
+                            self.state = State::ScriptDataDoubleEscapedLessThanSignState;
+                        },
+                        Element::Utf8('>') => {
+                            self.consume('>');
+                            self.state = State::ScriptDataState;
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.consume(CHAR_REPLACEMENT);
+                            self.state = State::ScriptDataDoubleEscapedState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInScriptHtmlCommentLikeText);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.consume(c.utf8());
+                            self.state = State::ScriptDataDoubleEscapedState;
+                        },
+                    }
+                }
+                State::ScriptDataDoubleEscapedLessThanSignState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('/') => {
+                            self.temporary_buffer = vec![];
+                            self.consume('/');
+                            self.state = State::ScriptDataDoubleEscapeEndState;
+                        }
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataDoubleEscapedState;
+                        },
+                    }
+                }
+                State::ScriptDataDoubleEscapeEndState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) |
+                        Element::Utf8('/') |
+                        Element::Utf8('>') => {
+                            if self.temporary_buffer.iter().collect::<String>().eq("script") {
+                                self.state = State::ScriptDataEscapedState;
+                            } else {
+                                self.state = State::ScriptDataDoubleEscapedState;
+                            }
+                            self.consume(c.utf8());
+                        }
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.temporary_buffer.push(to_lowercase!(ch));
+                            self.consume(ch);
+                        },
+                        Element::Utf8(ch @ 'a'..='z') => {
+                            self.temporary_buffer.push(ch);
+                            self.consume(ch);
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::ScriptDataDoubleEscapedState;
+                        }
+                    }
+                }
+                State::BeforeAttributeNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // Ignore character
+                        },
+                        Element::Utf8('/') | Element::Utf8('>') | Element::Eof => {
+                            self.stream.unread();
+                            self.state = State::AfterAttributeNameState;
+                        },
+                        Element::Utf8('=') => {
+                            self.parse_error(ParserError::UnexpectedEqualsSignBeforeAttributeName);
+
+                            self.store_and_clear_current_attribute();
+                            self.current_attr_name.push(c.utf8());
+
+                            self.state = State::AttributeNameState;
+                        }
+                        _ => {
+                            // Store an existing attribute if any and clear
+                            self.store_and_clear_current_attribute();
+
+                            self.stream.unread();
+                            self.state = State::AttributeNameState;
+                        },
+                    }
+                }
+                State::AttributeNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) |
+                        Element::Utf8('/') |
+                        Element::Utf8('>') |
+                        Element::Eof => {
+                            if self.attr_already_exists() {
+                                self.parse_error(ParserError::DuplicateAttribute);
+                            }
+                            self.stream.unread();
+
+                            self.state = State::AfterAttributeNameState
+                        },
+                        Element::Utf8('=') => {
+                            if self.attr_already_exists() {
+                                self.parse_error(ParserError::DuplicateAttribute);
+                            }
+                            self.state = State::BeforeAttributeValueState
+                        },
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.current_attr_name.push(to_lowercase!(ch));
+                        },
+                        Element::Utf8(CHAR_NUL)  => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.current_attr_name.push(CHAR_REPLACEMENT);
+                        },
+                        Element::Utf8('"') | Element::Utf8('\'') | Element::Utf8('<') => {
+                            self.parse_error(ParserError::UnexpectedCharacterInAttributeName);
+                            self.current_attr_name.push(c.utf8());
+                        },
+                        _ => self.current_attr_name.push(c.utf8()),
+                    }
+                }
+                State::AfterAttributeNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // Ignore
+                        },
+                        Element::Utf8('/') => self.state = State::SelfClosingStartState,
+                        Element::Utf8('=') => self.state = State::BeforeAttributeValueState,
+                        Element::Utf8('>') => {
+                            self.store_and_clear_current_attribute();
+                            self.add_stored_attributes_to_current_token();
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.store_and_clear_current_attribute();
+                            self.stream.unread();
+                            self.state = State::AttributeNameState;
+                        },
+                    }
+                },
+                State::BeforeAttributeValueState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // Ignore
+                        },
+                        Element::Utf8('"') => self.state = State::AttributeValueDoubleQuotedState,
+                        Element::Utf8('\'') => {
+                            self.state = State::AttributeValueSingleQuotedState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingAttributeValue);
+
+                            self.store_and_clear_current_attribute();
+                            self.add_stored_attributes_to_current_token();
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::AttributeValueUnquotedState;
+                        },
+                    }
+                }
+                State::AttributeValueDoubleQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('"') => self.state = State::AfterAttributeValueQuotedState,
+                        Element::Utf8('&') => _ = self.consume_character_reference(Some(Element::Utf8('"')), true),
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.current_attr_value.push(CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.current_attr_value.push(c.utf8());
+                        },
+                    }
+                }
+                State::AttributeValueSingleQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('\'') => self.state = State::AfterAttributeValueQuotedState,
+                        Element::Utf8('&') => _ = self.consume_character_reference(Some(Element::Utf8('\'')), true),
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.current_attr_value.push(CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.current_attr_value.push(c.utf8());
+                        },
+                    }
+                }
+                State::AttributeValueUnquotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            self.state = State::BeforeAttributeNameState;
+                        },
+                        Element::Utf8('&') => _ = self.consume_character_reference(Some(Element::Utf8('>')), true),
+                        Element::Utf8('>') => {
+                            self.store_and_clear_current_attribute();
+                            self.add_stored_attributes_to_current_token();
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.current_attr_value.push(CHAR_REPLACEMENT);
+                        },
+                        Element::Utf8('"') | Element::Utf8('\'') | Element::Utf8('<') | Element::Utf8('=') | Element::Utf8('`') => {
+                            self.parse_error(ParserError::UnexpectedCharacterInUnquotedAttributeValue);
+                            self.current_attr_value.push(c.utf8());
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.current_attr_value.push(c.utf8());
+                        },
+                    }
+
+                }
+                // State::CharacterReferenceInAttributeValueState => {}
+                State::AfterAttributeValueQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::BeforeAttributeNameState,
+                        Element::Utf8('/') => self.state = State::SelfClosingStartState,
+                        Element::Utf8('>') => {
+                            self.store_and_clear_current_attribute();
+                            self.add_stored_attributes_to_current_token();
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.parse_error(ParserError::MissingWhitespaceBetweenAttributes);
+                            self.stream.unread();
+                            self.state = State::BeforeAttributeNameState;
+                        },
+                    }
+                }
+                State::SelfClosingStartState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('>') => {
+                            self.set_is_closing_in_current_token(true);
+                            self.store_and_clear_current_attribute();
+                            self.add_stored_attributes_to_current_token();
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInTag);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            self.parse_error(ParserError::UnexpectedSolidusInTag);
+                            self.stream.unread();
+                            self.state = State::BeforeAttributeNameState;
+                        },
+                    }
+                }
+                State::BogusCommentState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Eof => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_to_token_value!(self, CHAR_REPLACEMENT);
+                        }
+                        _ => {
+                            add_to_token_value!(self, c.utf8());
+                        },
+                    }
+                }
+                State::MarkupDeclarationOpenState => {
+                    if self.stream.look_ahead_slice(2) == "--" {
+                        self.current_token = Some(Token::CommentToken{
+                            value: "".into(),
+                        });
+
+                        // Skip the two -- signs
+                        self.stream.seek(SeekCur, 2);
+
+                        self.state = State::CommentStartState;
+                        continue;
+                    }
+
+                    if self.stream.look_ahead_slice(7).to_uppercase() == "DOCTYPE" {
+                        self.stream.seek(SeekCur, 7);
+                        self.state = State::DocTypeState;
+                        continue;
+                    }
+
+                    if self.stream.look_ahead_slice(7) == "[CDATA[" {
+                        self.stream.seek(SeekCur, 7);
+
+                        // @TODO: If there is an adjusted current node and it is not an element in the HTML namespace,
+                        // then switch to the CDATA section state. Otherwise, this is a cdata-in-html-content parse error.
+                        // Create a comment token whose data is the "[CDATA[" string. Switch to the bogus comment state.
+                        self.parse_error(ParserError::CdataInHtmlContent);
+                        self.current_token = Some(Token::CommentToken{
+                            value: "[CDATA[".into(),
+                        });
+
+                        self.state = State::BogusCommentState;
+                        continue;
+                    }
+
+                    self.stream.seek(SeekCur, 1);
+                    self.parse_error(ParserError::IncorrectlyOpenedComment);
+                    self.stream.unread();
+                    self.current_token = Some(Token::CommentToken{
+                        value: "".into(),
+                    });
+
+                    self.state = State::BogusCommentState;
+                }
+                State::CommentStartState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.state = State::CommentStartDashState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::AbruptClosingOfEmptyComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        },
+                    }
+                }
+                State::CommentStartDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.state = State::CommentEndState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::AbruptClosingOfEmptyComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        _ => {
+                            add_to_token_value!(self, '-');
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        },
+                    }
+                }
+                State::CommentState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('<') => {
+                            add_to_token_value!(self, c.utf8());
+                            self.state = State::CommentLessThanSignState;
+                        }
+                        Element::Utf8('-') => self.state = State::CommentEndDashState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_to_token_value!(self, CHAR_REPLACEMENT);
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            add_to_token_value!(self, c.utf8());
+                        },
+                    }
+                }
+                State::CommentLessThanSignState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('!') => {
+                            add_to_token_value!(self, c.utf8());
+                            self.state = State::CommentLessThanSignBangState;
+                        },
+                        Element::Utf8('<') => {
+                            add_to_token_value!(self, c.utf8());
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        },
+                    }
+                },
+                State::CommentLessThanSignBangState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.state = State::CommentLessThanSignBangDashState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        },
+                    }
+                },
+                State::CommentLessThanSignBangDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.state = State::CommentLessThanSignBangDashDashState;
+                        },
+                        _ => {
+                            self.stream.unread();
+                            self.state = State::CommentEndDashState;
+                        },
+                    }
+                },
+                State::CommentLessThanSignBangDashDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Eof | Element::Utf8('>') => {
+                            self.stream.unread();
+                            self.state = State::CommentEndState;
+                        },
+                        _ => {
+                            self.parse_error(ParserError::NestedComment);
+                            self.stream.unread();
+                            self.state = State::CommentEndState;
+                        },
+                    }
+                },
+                State::CommentEndDashState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            self.state = State::CommentEndState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            add_to_token_value!(self, '-');
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        },
+                    }
+                }
+                State::CommentEndState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Utf8('!') => self.state = State::CommentEndBangState,
+                        Element::Utf8('-') => add_to_token_value!(self, '-'),
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            add_to_token_value!(self, '-');
+                            add_to_token_value!(self, '-');
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        }
+                    }
+                }
+                State::CommentEndBangState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('-') => {
+                            add_to_token_value!(self, '-');
+                            add_to_token_value!(self, '-');
+                            add_to_token_value!(self, '!');
+
+                            self.state = State::CommentEndDashState;
+                        },
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::IncorrectlyClosedComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInComment);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            add_to_token_value!(self, '-');
+                            add_to_token_value!(self, '-');
+                            add_to_token_value!(self, '!');
+                            self.stream.unread();
+                            self.state = State::CommentState;
+                        }
+                    }
+                }
+                State::DocTypeState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::BeforeDocTypeNameState,
+                        Element::Utf8('>') => {
+                            self.stream.unread();
+                            self.state = State::BeforeDocTypeNameState;
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+
+                            emit_token!(self, Token::DocTypeToken{
+                                name: None,
+                                force_quirks: true,
+                                pub_identifier: None,
+                                sys_identifier: None,
+                            });
+
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingWhitespaceBeforeDoctypeName);
+                            self.stream.unread();
+                            self.state = State::BeforeDocTypeNameState;
+                        }
+                    }
+                }
+                State::BeforeDocTypeNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // ignore
+                        }
+                        Element::Utf8(ch @ 'A'..='Z') => {
+                            self.current_token = Some(Token::DocTypeToken{
+                                name: None,
+                                force_quirks: false,
+                                pub_identifier: None,
+                                sys_identifier: None,
+                            });
+
+                            add_to_token_name!(self, to_lowercase!(ch));
+                            self.state = State::DocTypeNameState;
+                        }
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            self.current_token = Some(Token::DocTypeToken{
+                                name: None,
+                                force_quirks: false,
+                                pub_identifier: None,
+                                sys_identifier: None,
+                            });
+
+                            add_to_token_name!(self, CHAR_REPLACEMENT);
+                            self.state = State::DocTypeNameState;
+                        },
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingDoctypeName);
+                            emit_token!(self, Token::DocTypeToken{
+                                name: None,
+                                force_quirks: true,
+                                pub_identifier: None,
+                                sys_identifier: None,
+                            });
+
+                            self.state = State::DataState;
+                        },
+
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+
+                            emit_token!(self, Token::DocTypeToken{
+                                name: None,
+                                force_quirks: true,
+                                pub_identifier: None,
+                                sys_identifier: None,
+                            });
+
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.current_token = Some(Token::DocTypeToken{
+                                name: None,
+                                force_quirks: false,
+                                pub_identifier: None,
+                                sys_identifier: None,
+                            });
+
+                            add_to_token_name!(self, c.utf8());
+                            self.state = State::DocTypeNameState;
+                        }
+                    }
+                }
+                State::DocTypeNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::AfterDocTypeNameState,
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Utf8(ch @ 'A'..='Z') => add_to_token_name!(self, to_lowercase!(ch)),
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_to_token_name!(self, CHAR_REPLACEMENT);
+                        },
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => add_to_token_name!(self, c.utf8()),
+                    }
+                }
+                State::AfterDocTypeNameState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // ignore
+                        }
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.stream.unread();
+                            if self.stream.look_ahead_slice(6).to_uppercase() == "PUBLIC" {
+                                self.stream.seek(SeekCur, 6);
+                                self.state = State::AfterDocTypePublicKeywordState;
+                                continue;
+                            }
+                            if self.stream.look_ahead_slice(6).to_uppercase() == "SYSTEM" {
+                                self.stream.seek(SeekCur, 6);
+                                self.state = State::AfterDocTypeSystemKeywordState;
+                                continue;
+                            }
+                            // Make sure the parser is on the correct position again since we just
+                            // unread the character
+                            self.stream.seek(SeekCur, 1);
+                            self.parse_error(ParserError::InvalidCharacterSequenceAfterDoctypeName);
+                            self.stream.seek(SeekCur, -1);
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::AfterDocTypePublicKeywordState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::BeforeDocTypePublicIdentifierState,
+                        Element::Utf8('"') => {
+                            self.parse_error(ParserError::MissingWhitespaceAfterDoctypePublicKeyword);
+                            set_public_identifier!(self, String::new());
+                            self.state = State::DocTypePublicIdentifierDoubleQuotedState;
+                        }
+                        Element::Utf8('\'') => {
+                            self.parse_error(ParserError::MissingWhitespaceAfterDoctypePublicKeyword);
+                            set_public_identifier!(self, String::new());
+                            self.state = State::DocTypePublicIdentifierSingleQuotedState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingDoctypePublicIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingQuoteBeforeDoctypePublicIdentifier);
+                            self.stream.unread();
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::BeforeDocTypePublicIdentifierState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // ignore
+                        },
+                        Element::Utf8('"') => {
+                            set_public_identifier!(self, String::new());
+                            self.state = State::DocTypePublicIdentifierDoubleQuotedState;
+                        }
+                        Element::Utf8('\'') => {
+                            set_public_identifier!(self, String::new());
+                            self.state = State::DocTypePublicIdentifierSingleQuotedState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingDoctypePublicIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.stream.unread();
+                            self.parse_error(ParserError::MissingQuoteBeforeDoctypePublicIdentifier);
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::DocTypePublicIdentifierDoubleQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('"') => self.state = State::AfterDoctypePublicIdentifierState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_public_identifier!(self, CHAR_REPLACEMENT);
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::AbruptDoctypePublicIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => add_public_identifier!(self, c.utf8()),
+                    }
+                }
+                State::DocTypePublicIdentifierSingleQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('\'') => self.state = State::AfterDoctypePublicIdentifierState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_public_identifier!(self, CHAR_REPLACEMENT);
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::AbruptDoctypePublicIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => add_public_identifier!(self, c.utf8()),
+                    }
+                }
+                State::AfterDoctypePublicIdentifierState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::BetweenDocTypePublicAndSystemIdentifiersState,
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Utf8('"') => {
+                            self.parse_error(ParserError::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierDoubleQuotedState;
+                        }
+                        Element::Utf8('\'') => {
+                            self.parse_error(ParserError::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierSingleQuotedState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingQuoteBeforeDoctypeSystemIdentifier);
+                            self.stream.unread();
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::BetweenDocTypePublicAndSystemIdentifiersState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // ignore
+                        },
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Utf8('"') => {
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierDoubleQuotedState;
+                        }
+                        Element::Utf8('\'') => {
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierSingleQuotedState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingQuoteBeforeDoctypeSystemIdentifier);
+                            self.stream.unread();
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::AfterDocTypeSystemKeywordState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => self.state = State::BeforeDocTypeSystemIdentifierState,
+                        Element::Utf8('"') => {
+                            self.parse_error(ParserError::MissingWhitespaceAfterDoctypeSystemKeyword);
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierDoubleQuotedState;
+                        }
+                        Element::Utf8('\'') => {
+                            self.parse_error(ParserError::MissingWhitespaceAfterDoctypeSystemKeyword);
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierSingleQuotedState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingDoctypeSystemIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingQuoteBeforeDoctypeSystemIdentifier);
+                            self.stream.unread();
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::BeforeDocTypeSystemIdentifierState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // ignore
+                        },
+                        Element::Utf8('"') => {
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierDoubleQuotedState;
+                        }
+                        Element::Utf8('\'') => {
+                            set_system_identifier!(self, String::new());
+                            self.state = State::DocTypeSystemIdentifierSingleQuotedState;
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::MissingDoctypeSystemIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::MissingQuoteBeforeDoctypeSystemIdentifier);
+                            self.stream.unread();
+                            self.set_quirks_mode(true);
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+                }
+                State::DocTypeSystemIdentifierDoubleQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('"') => self.state = State::AfterDocTypeSystemIdentifierState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_system_identifier!(self, CHAR_REPLACEMENT);
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::AbruptDoctypeSystemIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => add_system_identifier!(self, c.utf8()),
+                    }
+
+                }
+                State::DocTypeSystemIdentifierSingleQuotedState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('\'') => self.state = State::AfterDocTypeSystemIdentifierState,
+                        Element::Utf8(CHAR_NUL) => {
+                            self.parse_error(ParserError::UnexpectedNullCharacter);
+                            add_system_identifier!(self, CHAR_REPLACEMENT);
+                        }
+                        Element::Utf8('>') => {
+                            self.parse_error(ParserError::AbruptDoctypeSystemIdentifier);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => add_system_identifier!(self, c.utf8()),
+                    }
+
+                }
+                State::AfterDocTypeSystemIdentifierState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(CHAR_TAB) |
+                        Element::Utf8(CHAR_LF) |
+                        Element::Utf8(CHAR_FF) |
+                        Element::Utf8(CHAR_SPACE) => {
+                            // ignore
+                        },
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInDoctype);
+                            self.set_quirks_mode(true);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            self.parse_error(ParserError::UnexpectedCharacterAfterDoctypeSystemIdentifier);
+                            self.stream.unread();
+                            self.state = State::BogusDocTypeState;
+                        }
+                    }
+
+                }
+                State::BogusDocTypeState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8('>') => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        Element::Utf8(CHAR_NUL) => self.parse_error(ParserError::UnexpectedNullCharacter),
+                        Element::Eof => {
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        }
+                        _ => {
+                            // ignore
+                        }
+                    }
+                }
+                State::CDataSectionState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(']') => {
+                            self.state = State::CDataSectionBracketState;
+                        }
+                        Element::Eof => {
+                            self.parse_error(ParserError::EofInCdata);
+                            emit_current_token!(self);
+                            self.state = State::DataState;
+                        },
+                        _ => self.consume(c.utf8()),
+                    }
+                },
+                State::CDataSectionBracketState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(']') => self.state = State::CDataSectionEndState,
+                        _ => {
+                            self.consume(']');
+                            self.stream.unread();
+                            self.state = State::CDataSectionState;
+                        }
+                    }
+                },
+                State::CDataSectionEndState => {
+                    let c = read_char!(self);
+                    match c {
+                        Element::Utf8(']') => self.consume(']'),
+                        Element::Utf8('>') => self.state = State::DataState,
+                        _ => {
+                            self.consume(']');
+                            self.consume(']');
+                            self.stream.unread();
+                            self.state = State::CDataSectionState;
+                        }
+                    }
+                }
+                _ => {
+                    panic!("state {:?} not implemented", self.state);
+                }
+            }
+        }
     }
 
     // Consumes the given char
@@ -140,16 +2164,27 @@ impl<'a> Tokenizer<'a> {
     }
 
     // Consumes the given string
-    pub(crate) fn consume_string(&mut self, s: String) {
+    pub(crate) fn consume_string(&mut self, s: &str) {
         // Add c to the current token data
         for c in s.chars() {
             self.consumed.push(c)
         }
     }
 
+    // Return true when the given end_token matches the stored start token (ie: 'table' matches when last_start_token = 'table')
+    fn is_appropriate_end_token(&self, end_token: &Vec<char>) -> bool {
+        let s: String = end_token.iter().collect();
+        self.last_start_token == s
+    }
+
     // Return the consumed string as a String
     pub fn get_consumed_str(&self) -> String {
-        self.consumed.iter().collect()
+        return self.consumed.iter().collect();
+    }
+
+    // Returns true if there is anything in the consume buffer
+    pub fn has_consumed_data(&self) -> bool {
+        return self.consumed.len() > 0;
     }
 
     // Clears the current consume buffer
@@ -157,73 +2192,127 @@ impl<'a> Tokenizer<'a> {
         self.consumed.clear()
     }
 
+    // Return the list of current parse errors
+    pub fn get_errors(&self) -> &Vec<ParseError> {
+        &self.errors
+    }
+
     // Creates a parser log error message
-    pub(crate) fn parse_error(&mut self, _str: &str) {
+    pub(crate) fn parse_error(&mut self, error: ParserError) {
+
+        // The previous position is where the error occurred
+        let pos = self.stream.get_previous_position();
+
+        let mut already_exists= false;
+        for err in &self.errors {
+            if err.line == pos.line && err.col == pos.col && err.message == error.as_str().to_string() {
+                already_exists = true;
+            }
+        }
+
+        // Don't add when this error already exists (for this exact position)
+        if already_exists {
+            // self.stream.seek(SeekCur, 1);
+            return
+        }
+
         // Add to parse log
-        println!("parse_error: {}", _str)
-    }
-}
+        self.errors.push(ParseError{
+            message: error.as_str().to_string(),
+            line: pos.line,
+            col: pos.col,
+            offset: pos.offset,
+        });
 
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::html5_parser::token::{Token, TokenTrait, TokenType};
+        // self.stream.seek(SeekCur, 1);
+    }
 
-    #[test]
-    fn test_tokens() {
-        let t = Token::CommentToken {
-            value: String::from("this is a comment"),
-        };
-        assert_eq!("comment[this is a comment]", t.to_string());
+    // Set is_closing_tag in current token
+    fn set_is_closing_in_current_token(&mut self, is_closing: bool) {
+        match &mut self.current_token.as_mut().unwrap() {
+            Token::EndTagToken { .. } => {
+                self.parse_error(ParserError::EndTagWithTrailingSolidus);
+            }
+            Token::StartTagToken { is_self_closing, .. } => {
+                *is_self_closing = is_closing;
+            }
+            _ => {}
+        }
+    }
 
-        let t = Token::TextToken {
-            value: String::from("this is a string"),
-        };
-        assert_eq!("str[this is a string]", t.to_string());
+    // Set force_quirk mode in current token
+    fn set_quirks_mode(&mut self, quirky: bool) {
+        match &mut self.current_token.as_mut().unwrap() {
+            Token::DocTypeToken { force_quirks, .. } => {
+                *force_quirks = quirky;
+            }
+            _ => {}
+        }
+    }
 
-        let t = Token::StartTagToken {
-            name: String::from("tag"),
-            is_self_closing: true,
-            attributes: Vec::new(),
-        };
-        assert_eq!("starttag[<tag/>]", t.to_string());
 
-        let t = Token::StartTagToken {
-            name: String::from("tag"),
-            is_self_closing: false,
-            attributes: Vec::new(),
-        };
-        assert_eq!("starttag[<tag>]", t.to_string());
+    // Adds a new attribute to the current token
+    fn set_add_attribute_to_current_token(&mut self, name: String, value: String) {
+        match &mut self.current_token.as_mut().unwrap() {
+            Token::StartTagToken { attributes, .. } => {
+                attributes.push(
+                    (name.clone(), value.clone())
+                );
+            }
+            _ => {}
+        }
 
-        let t = Token::EndTagToken {
-            name: String::from("tag"),
-        };
-        assert_eq!("endtag[</tag>]", t.to_string());
+        self.current_attr_name.clear()
+    }
 
-        let t = Token::DocTypeToken {
-            name: String::from("html"),
-            force_quirks: true,
-            pub_identifier: Option::from(String::from("foo")),
-            sys_identifier: Option::from(String::from("bar")),
-        };
-        assert_eq!("doctype[<html  FORCE_QUIRKS! foo bar>]", t.to_string());
+    // Sets the given name into the current token
+    fn set_name_in_current_token(&mut self, new_name: String) {
+        match &mut self.current_token.as_mut().unwrap() {
+            Token::StartTagToken { name, .. } => {
+                *name = new_name.clone();
+            },
+            Token::EndTagToken { name, .. } => {
+                *name = new_name.clone();
+            },
+            _ => panic!("trying to set the name of a non start/end tag token")
+        }
     }
 
-    #[test]
-    fn test_tokenizer() {
-        let mut is = InputStream::new();
-        is.read_from_str("This code is &copy; 2023 &#x80;", None);
+    // This function checks to see if there is already an attribute name like the one in current_attr_name.
+    fn attr_already_exists(&mut self) -> bool {
+        return self.current_attrs.iter().any(|(name, ..)| name == &self.current_attr_name);
+    }
 
-        let mut tkznr = Tokenizer::new(&mut is);
+    // Saves the current attribute name and value onto the current_attrs stack, if there is anything to store
+    fn store_and_clear_current_attribute(&mut self) {
+        if !self.current_attr_name.is_empty() && ! self.attr_already_exists() {
+            self.current_attrs.push((self.current_attr_name.clone(), self.current_attr_value.clone()));
+        }
 
-        let t = tkznr.next_token();
-        assert_eq!(TokenType::TextToken, t.type_of());
+        self.current_attr_name = String::new();
+        self.current_attr_value = String::new();
+    }
 
-        if let Token::TextToken { value } = t {
-            assert_eq!("This code is © 2023 €", value);
+    // This method will add current generated attributes to the current (start) token if needed.
+    fn add_stored_attributes_to_current_token(&mut self) {
+        if self.current_token.is_none() {
+            return;
+        }
+        if self.current_attrs.is_empty() {
+            return;
         }
 
-        let t = tkznr.next_token();
-        assert_eq!(TokenType::EofToken, t.type_of());
+        match self.current_token.as_mut().unwrap() {
+            Token::EndTagToken { .. } => {
+                self.parse_error(ParserError::EndTagWithAttributes);
+            },
+            Token::StartTagToken { attributes, .. } => {
+                for attr in &self.current_attrs {
+                    attributes.push(attr.clone());
+                }
+                self.current_attrs = vec![];
+            }
+            _ => {},
+        }
     }
-}
+}
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100755
index 000000000..58d532349
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,2 @@
+#[allow(dead_code)]
+pub mod html5_parser;
diff --git a/src/test_test.rs b/src/test_test.rs
deleted file mode 100644
index 63161c676..000000000
--- a/src/test_test.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-pub struct InputStream {
-}
-
-impl InputStream {
-    pub fn new() -> Self {
-        InputStream {}
-    }
-}
-
-// =======================================================================================
-
-pub struct Token;
-
-impl Token {
-    fn to_string(&self) -> String {
-        return String::from("token");
-    }
-}
-
-// =======================================================================================
-
-pub struct Tokenizer<'a> {
-    pub stream: &'a mut InputStream,
-    pub emitter: &'a mut dyn Emitter,
-}
-
-impl<'a> Tokenizer<'a> {
-    pub fn new(input: &'a mut InputStream, emitter: &'a mut dyn Emitter) -> Self {
-        return Tokenizer {
-            stream: input,
-            emitter,
-        }
-    }
-
-    pub fn next_token(&mut self)
-    {
-        let t = Token;
-        self.emitter.emit(t)
-    }
-}
-
-// =======================================================================================
-
-pub struct HtmlParser<'a> {
-    pub tokenizer: &'a mut Tokenizer<'a>,
-}
-
-impl<'a> HtmlParser<'a> {
-    pub fn new(tokenizer: &'a mut Tokenizer<'a>) -> Self {
-        HtmlParser{
-            tokenizer
-        }
-    }
-
-    pub fn get_tokenizer(&mut self) -> &mut Tokenizer<'a> {
-        return self.tokenizer;
-    }
-}
-
-// =======================================================================================
-
-pub trait Emitter {
-    fn emit(&mut self, t: Token);
-}
-
-pub struct StrEmitter {
-    pub output: String
-}
-
-impl StrEmitter {
-    pub fn new() -> Self {
-        StrEmitter {
-            output: String::new(),
-        }
-    }
-
-    fn get_output(&self) -> &String {
-        return &self.output;
-    }
-}
-
-impl Emitter for StrEmitter {
-    fn emit(&mut self, t: Token) {
-        self.output.push_str(&*t.to_string());
-    }
-}
-
-pub struct AppEmitter;
-
-impl AppEmitter {
-    pub fn new() -> Self {
-        AppEmitter
-    }
-}
-
-impl Emitter for AppEmitter {
-    fn emit(&mut self, t: Token) {
-        println!("O [{}]", t.to_string());
-    }
-}
-
-// =======================================================================================
-
-pub fn main() {
-    let mut is = InputStream::new();
-    let mut e = AppEmitter::new();
-    let mut t = Tokenizer::new(&mut is, &mut e);
-
-    let mut p = HtmlParser::new(&mut t);
-
-    p.get_tokenizer().next_token();
-    // println!("Output: {}", e.get_output())
-}