Skip to content

Commit

Permalink
Merge pull request #278 from gosub-browser/http-req-resp
Browse files Browse the repository at this point in the history
Http request / response objects and initial API endpoint
  • Loading branch information
jaytaph authored Nov 25, 2023
2 parents eb5cd50 + 56009fe commit 37f5c5c
Show file tree
Hide file tree
Showing 14 changed files with 848 additions and 12 deletions.
259 changes: 249 additions & 10 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ hickory-resolver = "0.24.0"
simple_logger = "4.2.0"
shared_singleton = "0.1.0"
testing_logger = "0.1.1"
cookie = { version = "0.18.0", features = ["secure", "private"] }
http = "1.0.0"
url = "2.5.0"

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
Expand Down
8 changes: 8 additions & 0 deletions src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,14 @@ impl CharIterator {
result
}

/// Read directly from bytes
pub fn read_from_bytes(&mut self, bytes: &[u8], e: Option<Encoding>) -> io::Result<()> {
self.u8_buffer = bytes.to_vec();
self.force_set_encoding(e.unwrap_or(Encoding::UTF8));
self.reset();
Ok(())
}

/// Populates the current buffer with the contents of given file f
pub fn read_from_file(&mut self, mut f: File, e: Option<Encoding>) -> io::Result<()> {
// First we read the u8 bytes into a buffer
Expand Down
4 changes: 2 additions & 2 deletions src/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use std::time::{SystemTime, UNIX_EPOCH};

/// A DNS entry is a mapping of a domain to zero or more IP address mapping
#[derive(Default, Clone, Debug, PartialEq)]
struct DnsEntry {
pub struct DnsEntry {
// domain name
domain: String,
// // Ip type that is stored in this entry (could be Ipv4, IPv6 or Both)
Expand Down Expand Up @@ -103,7 +103,7 @@ trait DnsCache {
fn flush_entry(&mut self, domain: &str);
}

struct Dns {
pub struct Dns {
resolvers: Vec<Box<dyn DnsResolver>>,
}

Expand Down
168 changes: 168 additions & 0 deletions src/engine.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
use crate::bytes::{CharIterator, Confidence, Encoding};
use crate::dns::ResolveType;
use crate::html5::parser::document::{Document, DocumentBuilder, DocumentHandle};
use crate::html5::parser::Html5Parser;
use crate::net::http::headers::Headers;
use crate::net::http::request::Request;
use crate::net::http::response::Response;
use crate::timing::{Timing, TimingTable};
use crate::types::{Error, ParseError, Result};
use cookie::CookieJar;
use core::fmt::Debug;
use std::io::Read;
use url::Url;

const USER_AGENT: &str = "Mozilla/5.0 (compatible; gosub/0.1; +https://gosub.io)";

const MAX_BYTES: u64 = 10_000_000;

/// Response that is returned from the fetch function
pub struct FetchResponse {
/// Request that has been send
pub request: Request,
/// Response that has been received
pub response: Response,
/// Document tree that is made from the response
pub document: DocumentHandle,
/// Parse errors that occurred while parsing the document tree
pub parse_errors: Vec<ParseError>,
/// Rendertree that is generated from the document tree and css tree
pub render_tree: String,
/// Timing table that contains all the timings
pub timings: TimingTable,
}

impl Debug for FetchResponse {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
writeln!(f, "Request:")?;
writeln!(f, "{}", self.request)?;
writeln!(f, "Response:")?;
writeln!(f, "{}", self.response)?;
writeln!(f, "Document tree:")?;
writeln!(f, "{}", self.document)?;
writeln!(f, "Parse errors:")?;
for error in &self.parse_errors {
writeln!(f, " ({}:{}) {}", error.line, error.col, error.message)?;
}
writeln!(f, "Render tree:")?;
writeln!(f, "{}", self.render_tree)?;
writeln!(f, "Timings:")?;
writeln!(f, "{}", self.timings)?;

Ok(())
}
}

fn fetch_url(
method: &str,
url: &str,
headers: Headers,
cookies: CookieJar,
) -> Result<FetchResponse> {
let mut http_req = Request::new(method, url, "HTTP/1.1");
http_req.headers = headers.clone();
http_req.cookies = cookies.clone();

let parts = Url::parse(url)?;

let mut fetch_response = FetchResponse {
request: http_req,
response: Response::new(),
document: DocumentBuilder::new_document(),
parse_errors: vec![],
render_tree: String::new(),
timings: TimingTable::default(),
};

// For now, we do a DNS lookup here. We don't use this information yet, but it allows us to
// measure the DNS lookup time.
fetch_response.timings.start(Timing::DnsLookup);

let mut resolver = crate::dns::Dns::new();
let Some(hostname) = parts.host_str() else {
return Err(Error::Generic(format!("invalid hostname: {}", url)));
};
let _ = resolver.resolve(hostname, ResolveType::Ipv4)?;

fetch_response.timings.end(Timing::DnsLookup);

// Fetch the HTML document from the site
fetch_response.timings.start(Timing::ContentTransfer);

let agent = ureq::agent();
let mut req = agent.request(method, url).set("User-Agent", USER_AGENT);
for (key, value) in headers.sorted() {
req = req.set(key, value);
}

match req.call() {
Ok(resp) => {
fetch_response.response = Response::new();
fetch_response.response.status = resp.status();
fetch_response.response.version = format!("{:?}", resp.http_version());
for key in &resp.headers_names() {
for value in resp.all(key) {
fetch_response.response.headers.set(key.as_str(), value);
}
}
// TODO: cookies
// for cookie in resp.cookies() {
// fetch_response.response.cookies.insert(cookie.name().to_string(), cookie.value().to_string());
// }

let len = if let Some(header) = resp.header("Content-Length") {
header.parse::<usize>().unwrap_or_default()
} else {
MAX_BYTES as usize
};

let mut bytes: Vec<u8> = Vec::with_capacity(len);
resp.into_reader().take(MAX_BYTES).read_to_end(&mut bytes)?;
fetch_response.response.body = bytes;
}
Err(e) => {
return Err(Error::Generic(format!("Failed to fetch URL: {}", e)));
}
}
fetch_response.timings.end(Timing::ContentTransfer);

println!("resp: {:?}", fetch_response.response);

fetch_response.timings.start(Timing::HtmlParse);

let mut chars = CharIterator::new();
let _ = chars.read_from_bytes(&fetch_response.response.body, Some(Encoding::UTF8));
chars.set_confidence(Confidence::Certain);
fetch_response.document = DocumentBuilder::new_document();

match Html5Parser::parse_document(&mut chars, Document::clone(&fetch_response.document), None) {
Ok(parse_errors) => {
fetch_response.parse_errors = parse_errors;
}
Err(e) => {
return Err(Error::Generic(format!("Failed to parse HTML: {}", e)));
}
}
fetch_response.timings.end(Timing::HtmlParse);

Ok(fetch_response)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_fetch_url() {
let url = "https://gosub.io/";
let mut headers = Headers::new();
headers.set("User-Agent", USER_AGENT);
let cookies = CookieJar::new();

let resp = fetch_url("GET", url, headers, cookies);
assert!(resp.is_ok());

let resp = resp.unwrap();
print!("{:?}", resp);
}
}
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,10 @@ pub mod types;
pub mod config;
#[allow(dead_code)]
mod dns;

#[allow(dead_code)]
mod engine;
#[allow(dead_code)]
mod net;
#[allow(dead_code)]
mod timing;
2 changes: 2 additions & 0 deletions src/net.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod errors;
pub mod http;
8 changes: 8 additions & 0 deletions src/net/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//! Error results that can be returned from the engine
use thiserror::Error;

#[derive(Error, Debug)]
pub enum Error {
#[error("net: generic error: {0}")]
Generic(String),
}
3 changes: 3 additions & 0 deletions src/net/http.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod headers;
pub mod request;
pub mod response;
50 changes: 50 additions & 0 deletions src/net/http/headers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use std::collections::HashMap;

#[derive(Default, Debug, Clone)]
pub struct Headers {
headers: HashMap<String, String>,
}

impl Headers {
pub fn new() -> Headers {
Headers {
headers: HashMap::new(),
}
}

pub fn set(&mut self, key: &str, value: &str) {
self.headers.insert(key.to_string(), value.to_string());
}

pub fn get(&self, key: &str) -> Option<&String> {
self.headers.get(key)
}

/// Returns all the header entries. Note that there is no ordering in here!
pub fn all(&self) -> &HashMap<String, String> {
&self.headers
}

pub fn sorted(&self) -> Vec<(&String, &String)> {
let mut sorted = self.headers.iter().collect::<Vec<_>>();
sorted.sort_by(|a, b| a.0.cmp(b.0));
sorted
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_headers() {
let mut headers = Headers::new();

headers.set("Content-Type", "application/json");
assert_eq!(headers.get("Content-Type").unwrap(), "application/json");

headers.set("Content-Type", "text/html");
assert_eq!(headers.get("Content-Type").unwrap(), "text/html");
assert_eq!(headers.all().len(), 1);
}
}
93 changes: 93 additions & 0 deletions src/net/http/request.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use crate::net::http::headers::Headers;
use cookie::CookieJar;
use core::fmt::{Display, Formatter};

pub struct Request {
pub method: String,
pub uri: String,
pub version: String,
pub headers: Headers,
pub cookies: CookieJar,
pub body: Vec<u8>,
}

impl Request {
pub(crate) fn new(method: &str, uri: &str, version: &str) -> Self {
Self {
method: method.to_string(),
uri: uri.to_string(),
version: version.to_string(),
headers: Headers::default(),
cookies: CookieJar::default(),
body: vec![],
}
}

pub fn headers(&mut self, headers: Headers) {
self.headers = headers;
}

pub fn cookies(&mut self, cookies: CookieJar) {
self.cookies = cookies;
}
}

impl Display for Request {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
writeln!(f, "{} {} {}", self.method, self.uri, self.version)?;
writeln!(f, "Headers:")?;
for (key, value) in self.headers.sorted() {
writeln!(f, " {}: {}", key, value)?;
}
writeln!(f, "Cookies:")?;
let mut sorted_cookies = self.cookies.iter().collect::<Vec<_>>();
sorted_cookies.sort_by(|a, b| a.name().cmp(b.name()));
for cookie in sorted_cookies {
writeln!(f, " {}", cookie)?;
}
writeln!(f, "Body: {} bytes", self.body.len())?;

Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;
use cookie::Cookie;

#[test]
fn test_request() {
let mut req = Request::new("GET", "/", "HTTP/1.1");
req.headers(Headers::new());
req.cookies(CookieJar::new());

req.headers.set("Content-Type", "application/json");
req.cookies.add(Cookie::new("qux", "wok"));
req.cookies.add(Cookie::new("foo", "bar"));
req.headers.set("Accept", "text/html");
req.headers.set("Accept-Encoding", "gzip, deflate, br");

assert_eq!(req.method, "GET");
assert_eq!(req.uri, "/");
assert_eq!(req.version, "HTTP/1.1");
assert_eq!(req.headers.all().len(), 3);
assert_eq!(req.cookies.iter().count(), 2);
}

#[test]
fn test_request_display() {
let mut req = Request::new("GET", "/", "HTTP/1.1");
req.headers(Headers::new());
req.cookies(CookieJar::new());

req.cookies.add(Cookie::new("foo", "bar"));
req.cookies.add(Cookie::new("qux", "wok"));
req.headers.set("Content-Type", "application/json");
req.headers.set("Accept", "text/html");
req.headers.set("Accept-Encoding", "gzip, deflate, br");

let s = format!("{}", req);
assert_eq!(s, "GET / HTTP/1.1\nHeaders:\n Accept: text/html\n Accept-Encoding: gzip, deflate, br\n Content-Type: application/json\nCookies:\n foo=bar\n qux=wok\nBody: 0 bytes\n");
}
}
Loading

0 comments on commit 37f5c5c

Please sign in to comment.