Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] Implementation of re-wrapping logic #64

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
005ac6b
Change queue from `Vec` to `VecDeque`
cdesaintguilhem Dec 9, 2024
ac22dfd
Add label to main formatting loop
cdesaintguilhem Dec 9, 2024
47baf12
Increase the formatting queue to always have at least 2 lines
cdesaintguilhem Dec 9, 2024
bf1ab97
Save all potential wrap points in a `Vec`
cdesaintguilhem Dec 9, 2024
8fefe6f
Change `find_wrap_point()` to return list of possible wrap points
cdesaintguilhem Dec 9, 2024
4711900
Wrap long comments at 80 characters
cdesaintguilhem Dec 9, 2024
11a26b6
WIP: initial re-wrapping detection logic
cdesaintguilhem Dec 9, 2024
567153d
Move large test files to a separate directory
cdesaintguilhem Dec 9, 2024
656ee0f
Add initial re-wrapping test file
cdesaintguilhem Dec 9, 2024
3d8e1f6
WIP: Initial wrapping logic
cdesaintguilhem Dec 9, 2024
44fff9c
Add two-to-one re-wrap test case
cdesaintguilhem Dec 10, 2024
57e2f03
Add environment re-wrap test case
cdesaintguilhem Dec 10, 2024
6163289
Add idempotent re-wrap test case
cdesaintguilhem Dec 10, 2024
077e048
Make EOL a valid re-wrap position
cdesaintguilhem Dec 10, 2024
94e2aef
Don't re-wrap from or into splitting lines
cdesaintguilhem Dec 10, 2024
085d670
Add multi-line re-wrap test case
cdesaintguilhem Dec 10, 2024
14049f2
Add re-wrap test case for items
cdesaintguilhem Dec 11, 2024
c44b60f
Don't wrap lines into comments or from comments
cdesaintguilhem Dec 11, 2024
afedb95
Re-queue current line for further re-wrapping
cdesaintguilhem Dec 11, 2024
1241da6
Re-wrap onto lines containing items
cdesaintguilhem Dec 12, 2024
ccdbc42
Amend re-wrap test to check trimming
cdesaintguilhem Dec 12, 2024
f2df0b4
Fix trimming issue
cdesaintguilhem Dec 12, 2024
3267fa7
Create test for pattern detection
cdesaintguilhem Dec 16, 2024
01a0786
Refactor pattern creation
cdesaintguilhem Dec 16, 2024
d47147d
Move pattern detection to its own module
cdesaintguilhem Dec 17, 2024
b4549da
Add module documentation
cdesaintguilhem Dec 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 61 additions & 46 deletions src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,22 @@ use crate::args::*;
use crate::ignore::*;
use crate::indent::*;
use crate::logging::*;
use crate::pattern::Pattern;
use crate::read::*;
use crate::regexes::{ENV_BEGIN, ENV_END, ITEM, RE_SPLITTING};
use crate::subs::*;
use crate::verbatim::*;
use crate::wrap::*;
use crate::write::*;
use crate::LINE_END;
use log::Level::{Info, Warn};
use std::collections::VecDeque;
use std::iter::zip;

/// The default number of lines to have in the formatting queue.
///
/// Two lines are required for re-wrapping so that the line following the current one can always be accessed.
const DEFAULT_QUEUE_LENGTH: usize = 2;

/// Central function to format a file
pub fn format_file(
old_text: &str,
Expand All @@ -29,7 +35,7 @@ pub fn format_file(

// Initialise
let mut state = State::new();
let mut queue: Vec<(usize, String)> = vec![];
let mut queue: VecDeque<(usize, String)> = vec![].into();
let mut new_text = String::with_capacity(2 * old_text.len());

// Select the character used for indentation.
Expand All @@ -38,8 +44,15 @@ pub fn format_file(
TabChar::Space => " ",
};

loop {
if let Some((linum_old, mut line)) = queue.pop() {
'main: loop {
// Add more lines to the queue if there aren't two
for _ in 0..DEFAULT_QUEUE_LENGTH.saturating_sub(queue.len()) {
if let Some((linum_old, line)) = old_lines.next() {
queue.push_back((linum_old, line.to_string()));
}
}

if let Some((linum_old, mut line)) = queue.pop_front() {
// Read the patterns present on this line.
let pattern = Pattern::new(&line);

Expand All @@ -63,8 +76,8 @@ pub fn format_file(
// Split the line into two ...
let (this_line, next_line) =
split_line(&line, &temp_state, file, args, logs);
// ... and queue the second part for formatting.
queue.push((linum_old, next_line.to_string()));
// ... and add the second part to the front of the queue for formatting.
queue.push_front((linum_old, next_line.to_string()));
line = this_line.to_string();
}

Expand Down Expand Up @@ -98,13 +111,49 @@ pub fn format_file(
if let Some([this_line, next_line_start, next_line]) =
wrapped_lines
{
queue.push((
queue.push_front((
linum_old,
[next_line_start, next_line].concat(),
));
queue.push((linum_old, this_line.to_string()));
continue;
queue.push_front((linum_old, this_line.to_string()));
continue 'main;
}
} else if let Some(rewrap_point) = can_rewrap(
line.trim_start(),
&pattern,
queue.front().map(|(_, next_line)| next_line.as_str()),
indent_length,
args,
) {
// Remove the next line from the queue and replace it after
// removing the re-wrapped text.
let (linum_old, next_line) = queue.pop_front().unwrap(); // Doesn't panic because we can re-wrap.

let trimmed_next_line = next_line.trim_start();

// Append the re-wrapped words to the current line
line = [
line.as_str(),
" ",
&trimmed_next_line[0..rewrap_point],
]
.concat();

// Select the line left after re-wrapping
let next_line =
trimmed_next_line[rewrap_point..].trim_start();

// Add to the queue if there text left in the next line
if !next_line.is_empty() {
queue.push_front((linum_old, next_line.to_owned()));
}

// Push the current line in the queue for further potential
// re-wrapping
queue.push_front((linum_old, line));

// Continue the loop to avoid writing the current line for now
continue;
}

// Lastly, apply the indent if the line didn't need wrapping.
Expand All @@ -116,10 +165,10 @@ pub fn format_file(
new_text.push_str(&line);
new_text.push_str(LINE_END);
state.linum_new += 1;
} else if let Some((linum_old, line)) = old_lines.next() {
queue.push((linum_old, line.to_string()));
} else {
break;
// If there are not lines in `queue`, then `old_lines` has been entirely consumed and it's safe to break the
// main loop.
break 'main;
}
}

Expand Down Expand Up @@ -197,40 +246,6 @@ impl State {
}
}

/// Record whether a line contains certain patterns to avoid recomputing
pub struct Pattern {
/// Whether a begin environment pattern is present
pub contains_env_begin: bool,
/// Whether an end environment pattern is present
pub contains_env_end: bool,
/// Whether an item pattern is present
pub contains_item: bool,
/// Whether a splitting pattern is present
pub contains_splitting: bool,
}

impl Pattern {
/// Check if a string contains patterns
pub fn new(s: &str) -> Self {
// If splitting does not match, no patterns are present
if RE_SPLITTING.is_match(s) {
Self {
contains_env_begin: s.contains(ENV_BEGIN),
contains_env_end: s.contains(ENV_END),
contains_item: s.contains(ITEM),
contains_splitting: true,
}
} else {
Self {
contains_env_begin: false,
contains_env_end: false,
contains_item: false,
contains_splitting: false,
}
}
}
}

/// Ensure that the indentation returns to zero at the end of the file
const fn indents_return_to_zero(state: &State) -> bool {
state.indent.actual == 0
Expand Down
1 change: 1 addition & 0 deletions src/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::args::*;
use crate::comments::*;
use crate::format::*;
use crate::logging::*;
use crate::pattern::Pattern;
use crate::regexes::*;
use core::cmp::max;
use log::Level;
Expand Down
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod format;
mod ignore;
mod indent;
mod logging;
mod pattern;
mod read;
mod regexes;
mod subs;
Expand Down
48 changes: 48 additions & 0 deletions src/pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//! Detecting patterns within lines

use crate::regexes::*;

/// Record whether a line contains certain patterns to avoid recomputing
#[derive(Default)]
pub struct Pattern {
/// Whether a begin environment pattern is present
pub contains_env_begin: bool,
/// Whether an end environment pattern is present
pub contains_env_end: bool,
/// Whether an item pattern is present
pub contains_item: bool,
/// Whether a splitting pattern is present
pub contains_splitting: bool,
}

impl Pattern {
/// Check if a string contains patterns
pub fn new(s: &str) -> Self {
let mut pattern = Self::default();

// If splitting does not match, no patterns are present
if RE_SPLITTING.is_match(s) {
pattern.contains_env_begin = s.contains(ENV_BEGIN);
pattern.contains_env_end = s.contains(ENV_END);
pattern.contains_item = s.contains(ITEM);
pattern.contains_splitting = true;
}

pattern
}
}

#[cfg(test)]
mod tests {
use super::Pattern;

#[test]
fn new_pattern() {
let pattern =
Pattern::new("\\begin{enumerate} \\end{enumerate} \\item ");
assert!(pattern.contains_env_begin);
assert!(pattern.contains_env_end);
assert!(pattern.contains_item);
assert!(pattern.contains_splitting);
}
}
1 change: 1 addition & 0 deletions src/subs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::args::*;
use crate::comments::*;
use crate::format::*;
use crate::logging::*;
use crate::pattern::Pattern;
use crate::regexes::*;
use crate::LINE_END;
use log::Level;
Expand Down
21 changes: 17 additions & 4 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,20 @@ fn test_target() {
assert!(!fail, "Some tests failed");
}

#[test]
#[ignore = "large test files ignored by default."]
fn test_large_source() {
let source_files = read_files_from_dir("./tests/large/source/");
for file in source_files {
if !test_file(
&format!("tests/large/source/{file}"),
&format!("tests/large/target/{file}"),
) {
panic!("Failed in {file}");
}
}
}

#[test]
#[ignore]
fn test_short() {
Expand All @@ -102,22 +116,21 @@ fn test_short() {
//"puthesis.cls",
//"quiver.sty",
//"readme.tex",
"rewrap.tex",
//"sections.tex",
"short_document.tex",
// "short_document.tex",
//"tikz_network.sty",
//"unicode.tex",
//"verbatim.tex",
//"wgu-cv.cls",
//"wrap.tex",
];
let mut fail = false;
for file in files {
if !test_file(
&format!("tests/source/{file}"),
&format!("tests/target/{file}"),
) {
fail = true;
panic!("Failed in {file}");
}
}
assert!(!fail, "Some tests failed");
}
1 change: 1 addition & 0 deletions src/verbatim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use crate::format::*;
use crate::logging::*;
use crate::pattern::Pattern;
use crate::regexes::*;
use log::Level::Warn;

Expand Down
Loading
Loading