From bcdaeebc20f50369872fad7854e528d165f1cba3 Mon Sep 17 00:00:00 2001 From: Nikolai Schimke Date: Thu, 1 Feb 2024 00:26:46 +0000 Subject: [PATCH 1/3] adding space optimisation and rustfmt config --- Cargo.toml | 5 +++++ rustfmt.toml | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 rustfmt.toml diff --git a/Cargo.toml b/Cargo.toml index e6a6386..3a3aaf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,3 +33,8 @@ harness = false [[bench]] name = "browser" harness = false + +[profile.release] +strip = true +opt-level = "z" +lto = true \ No newline at end of file diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..8c51a7b --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,17 @@ +array_width = 50 +blank_lines_upper_bound = 2 +chain_width = 40 +combine_control_expr = false +edition = "2021" +fn_single_line = true +force_multiline_blocks = true +format_code_in_doc_comments = true +format_generated_files = false +format_strings = true +imports_layout = "HorizontalVertical" +indent_style = "Visual" +match_arm_leading_pipes = "Always" +imports_granularity = "Crate" +group_imports = "StdExternalCrate" +struct_lit_single_line = false +use_field_init_shorthand = true \ No newline at end of file From 39ea169ac5871b11c6d25c30c2f44060cd1faf4a Mon Sep 17 00:00:00 2001 From: Nikolai Schimke Date: Thu, 1 Feb 2024 00:27:32 +0000 Subject: [PATCH 2/3] new cargo fmt layout --- benches/browser.rs | 133 +++++++++++++---------------- benches/db.rs | 32 +++---- src/bin/sportshub.rs | 12 +-- src/db/helpers.rs | 47 +++++------ src/db/models.rs | 85 +++++++++---------- src/query_selectors.rs | 116 ++++++++++++------------- src/scrape_utils.rs | 183 +++++++++++++++++++++------------------- src/web_server_utils.rs | 23 +++-- 8 files changed, 297 insertions(+), 334 deletions(-) diff --git a/benches/browser.rs b/benches/browser.rs index 086a406..a89ad50 100644 --- a/benches/browser.rs +++ b/benches/browser.rs @@ -5,117 +5,102 @@ use headless_chrome::Browser; fn create_connection() { let browser = Browser::new({ - headless_chrome::LaunchOptions { - headless: false, - sandbox: false, - ignore_certificate_errors: true, - ..Default::default() - } + headless_chrome::LaunchOptions { headless: false, + sandbox: false, + ignore_certificate_errors: true, + ..Default::default() } }); - browser - .unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser.unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn create_connection_headless() { let browser = Browser::new({ - headless_chrome::LaunchOptions { - headless: true, - sandbox: false, - ignore_certificate_errors: true, - ..Default::default() - } + headless_chrome::LaunchOptions { headless: true, + sandbox: false, + ignore_certificate_errors: true, + ..Default::default() } }); - browser - .unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser.unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn create_connection_with_sandbox() { let browser = Browser::new({ - headless_chrome::LaunchOptions { - headless: false, - sandbox: true, - ignore_certificate_errors: true, - ..Default::default() - } + headless_chrome::LaunchOptions { headless: false, + sandbox: true, + ignore_certificate_errors: true, + ..Default::default() } }); - browser - .unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser.unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn create_connection_with_extensions() { let browser = Browser::new({ - headless_chrome::LaunchOptions { - headless: false, - sandbox: true, - ignore_certificate_errors: true, - extensions: vec![OsStr::new("./chrome-ext/adblock")], - ..Default::default() - } + headless_chrome::LaunchOptions { headless: false, + sandbox: true, + ignore_certificate_errors: true, + extensions: vec![OsStr::new("./chrome-ext/adblock")], + ..Default::default() } }); - browser - .unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser.unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("browser_open"); // Configure Criterion.rs to detect smaller differences and increase sample size to improve // precision and counteract the resulting noise. - group - .significance_level(0.1) - .sample_size(10) - .measurement_time(std::time::Duration::from_secs(20)); + group.significance_level(0.1) + .sample_size(10) + .measurement_time(std::time::Duration::from_secs(20)); // 1.54 seconds - group.bench_function("Create new browser basic", |b| { - b.iter(create_connection) - }); + group.bench_function("Create new browser basic", |b| b.iter(create_connection)); // 0.943 seconds group.bench_function("Create new browser headless", |b| { - b.iter(create_connection_headless) - }); + b.iter(create_connection_headless) + }); // 1.48 seconds group.bench_function("Create new browser sandbox", |b| { - b.iter(create_connection_with_sandbox) - }); + b.iter(create_connection_with_sandbox) + }); // 1.69 seconds group.bench_function("Create new browser adblock ext", |b| { - b.iter(create_connection_with_extensions) - }); + b.iter(create_connection_with_extensions) + }); group.finish(); } diff --git a/benches/db.rs b/benches/db.rs index 67e6d61..f9c8fb5 100644 --- a/benches/db.rs +++ b/benches/db.rs @@ -1,9 +1,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; use scraper::db; -fn create_connection() { - db::helpers::establish_connection().unwrap(); -} +fn create_connection() { db::helpers::establish_connection().unwrap(); } fn criterion_benchmark(c: &mut Criterion) { // 43.6 nano seconds @@ -11,24 +9,22 @@ fn criterion_benchmark(c: &mut Criterion) { // 228.3 nano seconds c.bench_function("Create a new record", |b| { - let mut conn = db::helpers::establish_connection().unwrap(); - let new_stream = db::models::StreamNew { - away: "Away", - home: "Home", - league: "League", - country: "Country", - start_time: "Start Time", - url: "Url", - stream_link: "https://www.test.com", - }; - b.iter(|| db::helpers::create_stream(&mut conn, &new_stream)) - }); + let mut conn = db::helpers::establish_connection().unwrap(); + let new_stream = db::models::StreamNew { away: "Away", + home: "Home", + league: "League", + country: "Country", + start_time: "Start Time", + url: "Url", + stream_link: "https://www.test.com" }; + b.iter(|| db::helpers::create_stream(&mut conn, &new_stream)) + }); // 356 nano seconds c.bench_function("Get all streams", |b| { - let mut conn = db::helpers::establish_connection().unwrap(); - b.iter(|| db::helpers::get_streams(&mut conn)) - }); + let mut conn = db::helpers::establish_connection().unwrap(); + b.iter(|| db::helpers::get_streams(&mut conn)) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/src/bin/sportshub.rs b/src/bin/sportshub.rs index b11697b..91320f7 100644 --- a/src/bin/sportshub.rs +++ b/src/bin/sportshub.rs @@ -7,9 +7,11 @@ use scraper::{db, scrape_utils, web_server_utils}; pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations"); fn run_migrations(connection: &mut impl MigrationHarness) -> Result<(), Error> { - connection.revert_all_migrations(MIGRATIONS).unwrap(); + connection.revert_all_migrations(MIGRATIONS) + .unwrap(); println!("Reverted all migrations"); - connection.run_pending_migrations(MIGRATIONS).unwrap(); + connection.run_pending_migrations(MIGRATIONS) + .unwrap(); Ok(()) } @@ -43,14 +45,14 @@ async fn main() { let cli = Cli::parse(); match cli.command { - Some(Commands::Parse { tabs }) => { + | Some(Commands::Parse { tabs }) => { run_migrations(&mut conn).unwrap(); scrape_utils::start_scraping(tabs).unwrap(); } - Some(Commands::Server { port }) => { + | Some(Commands::Server { port }) => { web_server_utils::run(port).await; } - None => { + | None => { println!("use sportshub -h for help"); } } diff --git a/src/db/helpers.rs b/src/db/helpers.rs index 9ddad09..72dd6c3 100644 --- a/src/db/helpers.rs +++ b/src/db/helpers.rs @@ -1,13 +1,12 @@ //! Database operation helpers for sqlite, using diesel -use diesel::prelude::*; +use diesel::{prelude::*, RunQueryDsl}; -use diesel::RunQueryDsl; - -use super::models::{Stream, StreamNew}; -use super::schema; -use super::schema::stream; -use super::schema::stream::dsl::*; +use super::{ + models::{Stream, StreamNew}, + schema, + schema::{stream, stream::dsl::*}, +}; pub fn establish_connection() -> Result { let database_url = format!("{}/sports.db", std::env::temp_dir().display()); @@ -15,13 +14,11 @@ pub fn establish_connection() -> Result { Ok(SqliteConnection::establish(&database_url)?) } -pub fn create_stream( - conn: &mut SqliteConnection, - new_stream: &StreamNew, -) -> Result { - Ok(diesel::insert_or_ignore_into(stream::table) - .values(new_stream) - .execute(conn)?) +pub fn create_stream(conn: &mut SqliteConnection, + new_stream: &StreamNew) + -> Result { + Ok(diesel::insert_or_ignore_into(stream::table).values(new_stream) + .execute(conn)?) } pub fn get_streams(conn: &mut SqliteConnection) -> Result, anyhow::Error> { @@ -29,22 +26,18 @@ pub fn get_streams(conn: &mut SqliteConnection) -> Result, anyhow::E } pub fn get_empty_streams(conn: &mut SqliteConnection) -> Result, anyhow::Error> { - Ok(stream - .filter(schema::stream::stream_link.eq("")) - .load::(conn)?) + Ok(stream.filter(schema::stream::stream_link.eq("")) + .load::(conn)?) } pub fn get_linked_streams(conn: &mut SqliteConnection) -> Result, anyhow::Error> { - Ok(stream - .filter(schema::stream::stream_link.ne("")) - .load::(conn)?) + Ok(stream.filter(schema::stream::stream_link.ne("")) + .load::(conn)?) } -pub fn get_streams_by_id( - conn: &mut SqliteConnection, - search_id: i32, -) -> Result, anyhow::Error> { - Ok(stream - .filter(schema::stream::id.eq(search_id)) - .load::(conn)?) +pub fn get_streams_by_id(conn: &mut SqliteConnection, + search_id: i32) + -> Result, anyhow::Error> { + Ok(stream.filter(schema::stream::id.eq(search_id)) + .load::(conn)?) } diff --git a/src/db/models.rs b/src/db/models.rs index a5b8331..a2e70bf 100644 --- a/src/db/models.rs +++ b/src/db/models.rs @@ -1,9 +1,7 @@ //! This module contains the models for the diesel ORM use diesel::prelude::*; -use serde::ser::SerializeStruct; -use serde::Deserialize; -use serde::Serialize; +use serde::{ser::SerializeStruct, Deserialize, Serialize}; #[derive(Debug, Queryable, Deserialize, Clone)] pub struct Stream { @@ -31,8 +29,7 @@ pub struct StreamNew<'a> { impl Serialize for Stream { fn serialize(&self, serializer: S) -> Result - where - S: serde::ser::Serializer, + where S: serde::ser::Serializer { let split_streams: Vec<&str> = self.stream_link.split(',').collect(); let mut stream = serializer.serialize_struct("Stream", 8)?; @@ -53,61 +50,55 @@ mod tests { #[test] fn test_serialise_streamlink() { - let stream = Stream { - id: Some(1), - home: "home".to_string(), - away: "away".to_string(), - start_time: "start_time".to_string(), - league: "league".to_string(), - country: "country".to_string(), - url: "url".to_string(), - stream_link: "stream_link".to_string(), - }; + let stream = Stream { id: Some(1), + home: "home".to_string(), + away: "away".to_string(), + start_time: "start_time".to_string(), + league: "league".to_string(), + country: "country".to_string(), + url: "url".to_string(), + stream_link: "stream_link".to_string() }; let serialised = serde_json::to_string(&stream).unwrap(); - assert_eq!( - serialised, - "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\"league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":[\"stream_link\"]}" - ); + assert_eq!(serialised, + "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\\ + "league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":\ + [\"stream_link\"]}"); } #[test] fn test_serialise_streamlink_multiple() { - let stream = Stream { - id: Some(1), - home: "home".to_string(), - away: "away".to_string(), - start_time: "start_time".to_string(), - league: "league".to_string(), - country: "country".to_string(), - url: "url".to_string(), - stream_link: "stream_link,stream_link2".to_string(), - }; + let stream = Stream { id: Some(1), + home: "home".to_string(), + away: "away".to_string(), + start_time: "start_time".to_string(), + league: "league".to_string(), + country: "country".to_string(), + url: "url".to_string(), + stream_link: "stream_link,stream_link2".to_string() }; let serialised = serde_json::to_string(&stream).unwrap(); - assert_eq!( - serialised, - "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\"league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":[\"stream_link\",\"stream_link2\"]}" - ); + assert_eq!(serialised, + "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\\ + "league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":\ + [\"stream_link\",\"stream_link2\"]}"); } #[test] fn test_serialise_streamlink_empty() { - let stream = Stream { - id: Some(1), - home: "home".to_string(), - away: "away".to_string(), - start_time: "start_time".to_string(), - league: "league".to_string(), - country: "country".to_string(), - url: "url".to_string(), - stream_link: "".to_string(), - }; + let stream = Stream { id: Some(1), + home: "home".to_string(), + away: "away".to_string(), + start_time: "start_time".to_string(), + league: "league".to_string(), + country: "country".to_string(), + url: "url".to_string(), + stream_link: "".to_string() }; let serialised = serde_json::to_string(&stream).unwrap(); - assert_eq!( - serialised, - "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\"league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":[\"\"]}" - ); + assert_eq!(serialised, + "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\\ + "league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":\ + [\"\"]}"); } } diff --git a/src/query_selectors.rs b/src/query_selectors.rs index 1cc75e0..7166958 100644 --- a/src/query_selectors.rs +++ b/src/query_selectors.rs @@ -1,10 +1,8 @@ //! This module contains the functions to get the data from the dom of the eventlist //! and return the data as a string -use tl::Parser; -use tl::VDom; - use thiserror::Error; +use tl::{Parser, VDom}; #[derive(Error, Debug)] pub enum DomParseError { @@ -37,24 +35,24 @@ pub enum DomParseError { /// let parser = dom.parser(); /// /// let link = get_url_from_dom(&dom, &parser).unwrap(); -/// assert_eq!(link, "https://sportshub.fan/event/ypiranga_rs_novo_hamburgo_191503337/"); +/// assert_eq!(link, +/// "https://sportshub.fan/event/ypiranga_rs_novo_hamburgo_191503337/"); /// ``` pub fn get_url_from_dom(dom: &VDom<'_>, parser: &Parser<'_>) -> Result { - let q = dom - .query_selector("a") - .ok_or(DomParseError::NotFound)? - .next() - .ok_or(DomParseError::NotFound)? - .get(parser) - .ok_or(DomParseError::Unknown)? - .as_tag() - .ok_or(DomParseError::NoAttributeFound)? - .attributes() - .get("href") - .ok_or(DomParseError::NoAttributeFound)? - .ok_or(DomParseError::NoAttributeFound)? - .as_utf8_str() - .to_string(); + let q = dom.query_selector("a") + .ok_or(DomParseError::NotFound)? + .next() + .ok_or(DomParseError::NotFound)? + .get(parser) + .ok_or(DomParseError::Unknown)? + .as_tag() + .ok_or(DomParseError::NoAttributeFound)? + .attributes() + .get("href") + .ok_or(DomParseError::NoAttributeFound)? + .ok_or(DomParseError::NoAttributeFound)? + .as_utf8_str() + .to_string(); Ok(q) } @@ -81,19 +79,17 @@ pub fn get_url_from_dom(dom: &VDom<'_>, parser: &Parser<'_>) -> Result, - parser: &Parser<'_>, -) -> Result { - let q = dom - .query_selector("span.mr-5") - .ok_or(DomParseError::NotFound)? - .next() - .ok_or(DomParseError::NotFound)? - .get(parser) - .ok_or(DomParseError::Unknown)? - .inner_text(parser) - .to_string(); +pub fn get_game_name_from_dom(dom: &VDom<'_>, + parser: &Parser<'_>) + -> Result { + let q = dom.query_selector("span.mr-5") + .ok_or(DomParseError::NotFound)? + .next() + .ok_or(DomParseError::NotFound)? + .get(parser) + .ok_or(DomParseError::Unknown)? + .inner_text(parser) + .to_string(); Ok(q) } @@ -121,15 +117,14 @@ pub fn get_game_name_from_dom( /// let event_info = get_info_from_dom(&dom, &parser).unwrap(); /// assert_eq!(event_info, "Brazilian Campeonato Gaucho"); pub fn get_info_from_dom(dom: &VDom<'_>, parser: &Parser<'_>) -> Result { - let q = dom - .query_selector("span.evdesc.event-desc") - .ok_or(DomParseError::NotFound)? - .next() - .ok_or(DomParseError::NotFound)? - .get(parser) - .ok_or(DomParseError::Unknown)? - .inner_text(parser) - .to_string(); + let q = dom.query_selector("span.evdesc.event-desc") + .ok_or(DomParseError::NotFound)? + .next() + .ok_or(DomParseError::NotFound)? + .get(parser) + .ok_or(DomParseError::Unknown)? + .inner_text(parser) + .to_string(); Ok(q) } @@ -155,26 +150,25 @@ pub fn get_info_from_dom(dom: &VDom<'_>, parser: &Parser<'_>) -> Result, parser: &Parser<'_>) -> Result { - let q = dom - .query_selector("i.icon-competitions") - .ok_or(DomParseError::NotFound)? - .next() - .ok_or(DomParseError::NotFound)? - .get(parser) - .ok_or(DomParseError::Unknown)? - .as_tag() - .ok_or(DomParseError::NoAttributeFound)? - .attributes() - .get("style") - .ok_or(DomParseError::NoAttributeFound)? - .ok_or(DomParseError::NoAttributeFound)? - .as_utf8_str() - .split('/') - .last() - .ok_or(DomParseError::NoAttributeFound)? - .replace(");", "") - .replace(".svg", "") - .to_string(); + let q = dom.query_selector("i.icon-competitions") + .ok_or(DomParseError::NotFound)? + .next() + .ok_or(DomParseError::NotFound)? + .get(parser) + .ok_or(DomParseError::Unknown)? + .as_tag() + .ok_or(DomParseError::NoAttributeFound)? + .attributes() + .get("style") + .ok_or(DomParseError::NoAttributeFound)? + .ok_or(DomParseError::NoAttributeFound)? + .as_utf8_str() + .split('/') + .last() + .ok_or(DomParseError::NoAttributeFound)? + .replace(");", "") + .replace(".svg", "") + .to_string(); Ok(q) } diff --git a/src/scrape_utils.rs b/src/scrape_utils.rs index 0b814b0..7e5d156 100644 --- a/src/scrape_utils.rs +++ b/src/scrape_utils.rs @@ -2,23 +2,18 @@ //! and save them to database. It also checks the stream links and saves them //! to database. -use std::borrow::BorrowMut; - -use std::sync::Arc; -use std::sync::Mutex; -use std::thread; +use std::{ + borrow::BorrowMut, + sync::{Arc, Mutex}, + thread, +}; use anyhow::anyhow; -use diesel::ExpressionMethods; -use diesel::RunQueryDsl; -use diesel::SqliteConnection; -use headless_chrome::Browser; -use headless_chrome::Tab; +use db::{models, schema}; +use diesel::{ExpressionMethods, RunQueryDsl, SqliteConnection}; +use headless_chrome::{Browser, Tab}; -use crate::db; -use crate::query_selectors; -use db::models; -use db::schema; +use crate::{db, query_selectors}; /// This function scrapes all the games from the home page and saves them to database. /// It takes roughly 1 second to scrape ~500 games. @@ -30,13 +25,12 @@ use db::schema; pub fn today_games(tab: &Tab, conn: &mut SqliteConnection) -> Result<(), anyhow::Error> { // we navigate to the page and wait until the table showing links is loaded tab.navigate_to("https://reddit.sportshub.fan/")? - .wait_for_element(".list-events")?; + .wait_for_element(".list-events")?; // we get the html of the table and remove all the tabs and newlines - let html = tab - .find_element(".list-events")? - .get_content()? - .replace(['\t', '\n'], ""); + let html = tab.find_element(".list-events")? + .get_content()? + .replace(['\t', '\n'], ""); // create the parser using tl let dom = tl::parse(&html, tl::ParserOptions::default())?; @@ -78,8 +72,14 @@ pub fn parse_game(conn: &mut SqliteConnection, html: &str) -> Result<(), anyhow: let name = query_selectors::get_game_name_from_dom(&dom, parser)?; let teams: Vec<&str> = name.split('–').collect(); - let home = teams.first().unwrap_or(&"???").trim().to_string(); - let away = teams.last().unwrap_or(&"???").trim().to_string(); + let home = teams.first() + .unwrap_or(&"???") + .trim() + .to_string(); + let away = teams.last() + .unwrap_or(&"???") + .trim() + .to_string(); // we get the info of the game, such as time, league, country // format is: League / Time @@ -87,8 +87,12 @@ pub fn parse_game(conn: &mut SqliteConnection, html: &str) -> Result<(), anyhow: // we split the info into league and time let mut info_parsed = info.split('/'); - let league = &info_parsed.next().unwrap_or("Unknown").to_string(); - let time = info_parsed.next().unwrap_or("Unknown").to_string(); + let league = &info_parsed.next() + .unwrap_or("Unknown") + .to_string(); + let time = info_parsed.next() + .unwrap_or("Unknown") + .to_string(); // we get the country of the game // format is: @@ -96,15 +100,13 @@ pub fn parse_game(conn: &mut SqliteConnection, html: &str) -> Result<(), anyhow: // we create a new stream and save it to database // we leave stream_link empty for now - let new_stream = models::StreamNew { - home: &home, - away: &away, - start_time: &time, - league, - country: &country, - url: &url, - stream_link: "", - }; + let new_stream = models::StreamNew { home: &home, + away: &away, + start_time: &time, + league, + country: &country, + url: &url, + stream_link: "" }; db::helpers::create_stream(conn, &new_stream)?; @@ -121,12 +123,12 @@ pub fn parse_game(conn: &mut SqliteConnection, html: &str) -> Result<(), anyhow: /// *tab* - is the tab that we use to navigate to the page and scrape the links, we use headless_chrome tabs. /// *conn* - is the connection to the database, we use diesel to save the links to database. /// *url* - is the url of the game page that we get from database. -pub fn url_to_links( - tab: &Tab, - conn: &mut SqliteConnection, - url: &str, -) -> Result<(), anyhow::Error> { - tab.navigate_to(url)?.wait_for_element("#content-event")?; +pub fn url_to_links(tab: &Tab, + conn: &mut SqliteConnection, + url: &str) + -> Result<(), anyhow::Error> { + tab.navigate_to(url)? + .wait_for_element("#content-event")?; // they encode url, so we need to decode it let u = urlencoding::decode(url).unwrap(); @@ -145,19 +147,24 @@ pub fn url_to_links( // we get the links from the elements // checking if they have "//" in them because some of them are just text - let stream_links: Vec = elements - .unwrap() - .into_iter() - .map(|e| e.get_attributes().unwrap().unwrap().get(1).unwrap().clone()) - .collect(); + let stream_links: Vec = elements.unwrap() + .into_iter() + .map(|e| { + e.get_attributes() + .unwrap() + .unwrap() + .get(1) + .unwrap() + .clone() + }) + .collect(); let joined_links = stream_links.join(","); // we save the links to database - diesel::update(schema::stream::table) - .set(schema::stream::stream_link.eq(joined_links)) - .filter(schema::stream::url.eq(u)) - .execute(conn)?; + diesel::update(schema::stream::table).set(schema::stream::stream_link.eq(joined_links)) + .filter(schema::stream::url.eq(u)) + .execute(conn)?; Ok(()) } @@ -166,18 +173,18 @@ pub fn url_to_links( /// It takes roughly 27 seconds to check all the links. /// (My 8gb ram m1 macbook air with a 90mbps internet connection can handle 10 tabs relatively easily) /// It can be improved by using a shared queue instead of splitting it. -pub fn check_all_links( - browser: &Browser, - conn: &mut SqliteConnection, - tabs_count: usize, -) -> Result<(), anyhow::Error> { +pub fn check_all_links(browser: &Browser, + conn: &mut SqliteConnection, + tabs_count: usize) + -> Result<(), anyhow::Error> { // we get all the streams from database that have no links // wrap it in an arc to share it between threads let all_streams = Arc::new(db::helpers::get_empty_streams(conn)?); // we split the streams into chunks and create a thread for each chunk - let chunked_streams: Vec<&[models::Stream]> = - all_streams.chunks(all_streams.len() / tabs_count).collect(); + let chunked_streams: Vec<&[models::Stream]> = all_streams.chunks(all_streams.len() + / tabs_count) + .collect(); let length = all_streams.len(); @@ -194,29 +201,28 @@ pub fn check_all_links( tabs.push(tab.clone()); // we get the streams from the chunked streams and turn it to a vec - let mut streams = chunked_streams - .get(tab_num) - .ok_or(anyhow!("invalid chunked_stream index"))? - .to_vec() - .clone(); + let mut streams = chunked_streams.get(tab_num) + .ok_or(anyhow!("invalid chunked_stream index"))? + .to_vec() + .clone(); let completed = completed_mutex.clone(); threads.push(thread::spawn(move || { - // sqlite should be able to handle 10 connections at once - let mut conn = db::helpers::establish_connection().unwrap(); - - // we iterate over all the streams and check them - while let Some(stream) = streams.pop() { - check_link(tab.clone().borrow_mut(), &mut conn, &stream.url).unwrap(); - // we print the progress - let mut completed_count = completed - .lock() - .expect("mutex is already opened by current thread"); - *completed_count += 1; - println!("{} / {}", completed_count, length); - } - })); + // sqlite should be able to handle 10 connections at once + let mut conn = db::helpers::establish_connection().unwrap(); + + // we iterate over all the streams and check them + while let Some(stream) = streams.pop() { + check_link(tab.clone().borrow_mut(), &mut conn, &stream.url).unwrap(); + // we print the progress + let mut completed_count = + completed.lock() + .expect("mutex is already opened by current thread"); + *completed_count += 1; + println!("{} / {}", completed_count, length); + } + })); } // we wait for all the threads to finish @@ -228,19 +234,16 @@ pub fn check_all_links( let time_end = std::time::Instant::now(); - println!( - "Time elapsed to scan all games: {:?}", - time_end - time_start - ); + println!("Time elapsed to scan all games: {:?}", + time_end - time_start); Ok(()) } -pub fn check_link( - tab: &mut Arc, - conn: &mut SqliteConnection, - link: &str, -) -> Result<(), anyhow::Error> { +pub fn check_link(tab: &mut Arc, + conn: &mut SqliteConnection, + link: &str) + -> Result<(), anyhow::Error> { url_to_links(tab.borrow_mut(), conn.borrow_mut(), link).unwrap(); Ok(()) @@ -249,12 +252,10 @@ pub fn check_link( pub fn start_scraping(open_tabs: usize) -> Result<(), anyhow::Error> { // realised we didnt need adblocker when headless let browser = Browser::new({ - headless_chrome::LaunchOptions { - headless: true, - sandbox: true, - ignore_certificate_errors: true, - ..Default::default() - } + headless_chrome::LaunchOptions { headless: true, + sandbox: true, + ignore_certificate_errors: true, + ..Default::default() } })?; let mut conn = db::helpers::establish_connection()?; @@ -277,7 +278,11 @@ pub fn start_scraping(open_tabs: usize) -> Result<(), anyhow::Error> { // we close all the tabs because otherwise it shows an error when program // finishes - for t in (*browser.get_tabs().as_ref().lock().unwrap()).iter() { + for t in (*browser.get_tabs() + .as_ref() + .lock() + .unwrap()).iter() + { t.close(true)?; } diff --git a/src/web_server_utils.rs b/src/web_server_utils.rs index b6a7850..0f1871e 100644 --- a/src/web_server_utils.rs +++ b/src/web_server_utils.rs @@ -1,11 +1,11 @@ //! This module contains the web server for the API. //! It uses the rocket framework. -use crate::db; use db::models::Stream; - use rocket::{get, routes, serde::json::Json, Rocket}; +use crate::db; + #[get("/")] async fn get_all_streams() -> Json> { let mut conn = db::helpers::establish_connection().unwrap(); @@ -31,15 +31,12 @@ async fn get_stream_by_id(id: i32) -> Json> { } pub async fn run(port: u16) { - Rocket::custom(rocket::Config { - port, - ..Default::default() - }) - .mount( - "/", - routes![get_all_streams, get_active_streams, get_stream_by_id], - ) - .launch() - .await - .unwrap(); + Rocket::custom(rocket::Config { port, + ..Default::default() }).mount("/", + routes![get_all_streams, + get_active_streams, + get_stream_by_id]) + .launch() + .await + .unwrap(); } From 358bf1dd5a4667d9faca4798040698466b179a19 Mon Sep 17 00:00:00 2001 From: Nikolai Schimke Date: Thu, 1 Feb 2024 00:30:31 +0000 Subject: [PATCH 3/3] fixing rustfmt --- benches/browser.rs | 129 ++++++++++++++++++++++++------------------- benches/db.rs | 32 ++++++----- rustfmt.toml | 10 +--- src/bin/sportshub.rs | 12 ++-- src/db/models.rs | 69 +++++++++++------------ 5 files changed, 130 insertions(+), 122 deletions(-) diff --git a/benches/browser.rs b/benches/browser.rs index a89ad50..683950d 100644 --- a/benches/browser.rs +++ b/benches/browser.rs @@ -5,102 +5,115 @@ use headless_chrome::Browser; fn create_connection() { let browser = Browser::new({ - headless_chrome::LaunchOptions { headless: false, - sandbox: false, - ignore_certificate_errors: true, - ..Default::default() } + headless_chrome::LaunchOptions { + headless: false, + sandbox: false, + ignore_certificate_errors: true, + ..Default::default() + } }); - browser.unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser + .unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn create_connection_headless() { let browser = Browser::new({ - headless_chrome::LaunchOptions { headless: true, - sandbox: false, - ignore_certificate_errors: true, - ..Default::default() } + headless_chrome::LaunchOptions { + headless: true, + sandbox: false, + ignore_certificate_errors: true, + ..Default::default() + } }); - browser.unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser + .unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn create_connection_with_sandbox() { let browser = Browser::new({ - headless_chrome::LaunchOptions { headless: false, - sandbox: true, - ignore_certificate_errors: true, - ..Default::default() } + headless_chrome::LaunchOptions { + headless: false, + sandbox: true, + ignore_certificate_errors: true, + ..Default::default() + } }); - browser.unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser + .unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn create_connection_with_extensions() { let browser = Browser::new({ - headless_chrome::LaunchOptions { headless: false, - sandbox: true, - ignore_certificate_errors: true, - extensions: vec![OsStr::new("./chrome-ext/adblock")], - ..Default::default() } + headless_chrome::LaunchOptions { + headless: false, + sandbox: true, + ignore_certificate_errors: true, + extensions: vec![OsStr::new("./chrome-ext/adblock")], + ..Default::default() + } }); - browser.unwrap() - .get_tabs() - .lock() - .unwrap() - .iter() - .for_each(|e| { - e.close(false).unwrap(); - }) + browser + .unwrap() + .get_tabs() + .lock() + .unwrap() + .iter() + .for_each(|e| { + e.close(false).unwrap(); + }) } fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("browser_open"); // Configure Criterion.rs to detect smaller differences and increase sample size to improve // precision and counteract the resulting noise. - group.significance_level(0.1) - .sample_size(10) - .measurement_time(std::time::Duration::from_secs(20)); + group + .significance_level(0.1) + .sample_size(10) + .measurement_time(std::time::Duration::from_secs(20)); // 1.54 seconds group.bench_function("Create new browser basic", |b| b.iter(create_connection)); // 0.943 seconds group.bench_function("Create new browser headless", |b| { - b.iter(create_connection_headless) - }); + b.iter(create_connection_headless) + }); // 1.48 seconds group.bench_function("Create new browser sandbox", |b| { - b.iter(create_connection_with_sandbox) - }); + b.iter(create_connection_with_sandbox) + }); // 1.69 seconds group.bench_function("Create new browser adblock ext", |b| { - b.iter(create_connection_with_extensions) - }); + b.iter(create_connection_with_extensions) + }); group.finish(); } diff --git a/benches/db.rs b/benches/db.rs index f9c8fb5..67e6d61 100644 --- a/benches/db.rs +++ b/benches/db.rs @@ -1,7 +1,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; use scraper::db; -fn create_connection() { db::helpers::establish_connection().unwrap(); } +fn create_connection() { + db::helpers::establish_connection().unwrap(); +} fn criterion_benchmark(c: &mut Criterion) { // 43.6 nano seconds @@ -9,22 +11,24 @@ fn criterion_benchmark(c: &mut Criterion) { // 228.3 nano seconds c.bench_function("Create a new record", |b| { - let mut conn = db::helpers::establish_connection().unwrap(); - let new_stream = db::models::StreamNew { away: "Away", - home: "Home", - league: "League", - country: "Country", - start_time: "Start Time", - url: "Url", - stream_link: "https://www.test.com" }; - b.iter(|| db::helpers::create_stream(&mut conn, &new_stream)) - }); + let mut conn = db::helpers::establish_connection().unwrap(); + let new_stream = db::models::StreamNew { + away: "Away", + home: "Home", + league: "League", + country: "Country", + start_time: "Start Time", + url: "Url", + stream_link: "https://www.test.com", + }; + b.iter(|| db::helpers::create_stream(&mut conn, &new_stream)) + }); // 356 nano seconds c.bench_function("Get all streams", |b| { - let mut conn = db::helpers::establish_connection().unwrap(); - b.iter(|| db::helpers::get_streams(&mut conn)) - }); + let mut conn = db::helpers::establish_connection().unwrap(); + b.iter(|| db::helpers::get_streams(&mut conn)) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/rustfmt.toml b/rustfmt.toml index 8c51a7b..72dd056 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,17 +1,9 @@ -array_width = 50 blank_lines_upper_bound = 2 -chain_width = 40 combine_control_expr = false edition = "2021" -fn_single_line = true force_multiline_blocks = true format_code_in_doc_comments = true format_generated_files = false -format_strings = true imports_layout = "HorizontalVertical" -indent_style = "Visual" -match_arm_leading_pipes = "Always" imports_granularity = "Crate" -group_imports = "StdExternalCrate" -struct_lit_single_line = false -use_field_init_shorthand = true \ No newline at end of file +group_imports = "StdExternalCrate" \ No newline at end of file diff --git a/src/bin/sportshub.rs b/src/bin/sportshub.rs index 91320f7..b11697b 100644 --- a/src/bin/sportshub.rs +++ b/src/bin/sportshub.rs @@ -7,11 +7,9 @@ use scraper::{db, scrape_utils, web_server_utils}; pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations"); fn run_migrations(connection: &mut impl MigrationHarness) -> Result<(), Error> { - connection.revert_all_migrations(MIGRATIONS) - .unwrap(); + connection.revert_all_migrations(MIGRATIONS).unwrap(); println!("Reverted all migrations"); - connection.run_pending_migrations(MIGRATIONS) - .unwrap(); + connection.run_pending_migrations(MIGRATIONS).unwrap(); Ok(()) } @@ -45,14 +43,14 @@ async fn main() { let cli = Cli::parse(); match cli.command { - | Some(Commands::Parse { tabs }) => { + Some(Commands::Parse { tabs }) => { run_migrations(&mut conn).unwrap(); scrape_utils::start_scraping(tabs).unwrap(); } - | Some(Commands::Server { port }) => { + Some(Commands::Server { port }) => { web_server_utils::run(port).await; } - | None => { + None => { println!("use sportshub -h for help"); } } diff --git a/src/db/models.rs b/src/db/models.rs index a2e70bf..f566752 100644 --- a/src/db/models.rs +++ b/src/db/models.rs @@ -29,7 +29,8 @@ pub struct StreamNew<'a> { impl Serialize for Stream { fn serialize(&self, serializer: S) -> Result - where S: serde::ser::Serializer + where + S: serde::ser::Serializer, { let split_streams: Vec<&str> = self.stream_link.split(',').collect(); let mut stream = serializer.serialize_struct("Stream", 8)?; @@ -50,55 +51,55 @@ mod tests { #[test] fn test_serialise_streamlink() { - let stream = Stream { id: Some(1), - home: "home".to_string(), - away: "away".to_string(), - start_time: "start_time".to_string(), - league: "league".to_string(), - country: "country".to_string(), - url: "url".to_string(), - stream_link: "stream_link".to_string() }; + let stream = Stream { + id: Some(1), + home: "home".to_string(), + away: "away".to_string(), + start_time: "start_time".to_string(), + league: "league".to_string(), + country: "country".to_string(), + url: "url".to_string(), + stream_link: "stream_link".to_string(), + }; let serialised = serde_json::to_string(&stream).unwrap(); assert_eq!(serialised, - "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\\ - "league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":\ - [\"stream_link\"]}"); + "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\"league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":[\"stream_link\"]}"); } #[test] fn test_serialise_streamlink_multiple() { - let stream = Stream { id: Some(1), - home: "home".to_string(), - away: "away".to_string(), - start_time: "start_time".to_string(), - league: "league".to_string(), - country: "country".to_string(), - url: "url".to_string(), - stream_link: "stream_link,stream_link2".to_string() }; + let stream = Stream { + id: Some(1), + home: "home".to_string(), + away: "away".to_string(), + start_time: "start_time".to_string(), + league: "league".to_string(), + country: "country".to_string(), + url: "url".to_string(), + stream_link: "stream_link,stream_link2".to_string(), + }; let serialised = serde_json::to_string(&stream).unwrap(); assert_eq!(serialised, - "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\\ - "league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":\ - [\"stream_link\",\"stream_link2\"]}"); + "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\"league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":[\"stream_link\",\"stream_link2\"]}"); } #[test] fn test_serialise_streamlink_empty() { - let stream = Stream { id: Some(1), - home: "home".to_string(), - away: "away".to_string(), - start_time: "start_time".to_string(), - league: "league".to_string(), - country: "country".to_string(), - url: "url".to_string(), - stream_link: "".to_string() }; + let stream = Stream { + id: Some(1), + home: "home".to_string(), + away: "away".to_string(), + start_time: "start_time".to_string(), + league: "league".to_string(), + country: "country".to_string(), + url: "url".to_string(), + stream_link: "".to_string(), + }; let serialised = serde_json::to_string(&stream).unwrap(); assert_eq!(serialised, - "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\\ - "league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":\ - [\"\"]}"); + "{\"id\":1,\"home\":\"home\",\"away\":\"away\",\"start_time\":\"start_time\",\"league\":\"league\",\"country\":\"country\",\"url\":\"url\",\"stream_link\":[\"\"]}"); } }