Skip to content

Commit

Permalink
Move CLI flag extraction into ArgSplitter (#21943)
Browse files Browse the repository at this point in the history
Previously, for legacy reasons, we had a separate pass for
processing flags, while the ArgSplitter dealt with finding 
goals and specs. This turned out to be not just a code
redundancy issue, but it caused a bug: #21930 . 

This PR cleans up the redundancy and smooths out
some ragged legacy edges in the Rust options parser:

- Employs uniform terminology: Now all CLI args 
  (specs, goals, flags) are referred to as `args`,
  while args that set option values are referred to
  as `flags`. Previously we used `flags` and `args`
  somewhat interchangeably to refer to the latter,
  but also `args` to refer to the former. This clears
  up the ambiguity.
- Notably, the `Arg` struct is now called `Flag`, and
  lives in `flags.rs`. 
- Gets rid of the previous `Args` struct and its  logic, 
  reusing its name to now be just a simple holder of
  arg strings prior to parsing.
- Moves the flag-gathering logic, previously in the old `Args`,
   into `ArgSplitter`.
- Modify tests appropriately. Including removing testing
  of arbitrary short (`-f`) flags, which happened to work 
  before, even though we only officially support a known 
  small set of such flags (`-v`, `-h`, `-l`).
- Regression test for #21930. 
- Make passthru args an Option, so we can distinguish between
  `pants ...` and `pants ... --` (i.e., no passthru separator vs 
  a passthru separator with no args after it).

---------

Co-authored-by: Huon Wilson <[email protected]>
  • Loading branch information
benjyw and huonw authored Feb 12, 2025
1 parent a96cffd commit dd87b85
Show file tree
Hide file tree
Showing 12 changed files with 514 additions and 386 deletions.
2 changes: 1 addition & 1 deletion docs/notes/2.25.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Thank you to [Klayvio](https://www.klaviyo.com/) and [Normal Computing](https://
- [Fixed](https://github.com/pantsbuild/pants/pull/21694) bug where an `archive` target is unable to produce a ZIP file with no extension (see [#21693](https://github.com/pantsbuild/pants/issues/21693) for more info).
- `[subprocess-environment].env_vars` and `extra_env_vars` (on many subsystems and targets) now supports a generalised glob syntax using Python [fnmatch](https://docs.python.org/3/library/fnmatch.html) to construct patterns like `AWS_*`, `TF_*`, and `S2TESTS_*`.
- Suspicious values now generate a warning instead of hard error when using the `<PATH>` special value to inject shell `PATH` values to the `system-binaries` subsystem.

- [Fixed](https://github.com/pantsbuild/pants/pull/21943) a bug where negative target specs (e.g., `-folder::`) were not recognized on the command line.
### Remote Caching/Execution

Pants now sends a `user-agent` header with every request to a remote store or a remote execution service,
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/option/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Tuple

from pants.option.scope import GLOBAL_SCOPE
from pants.util.strutil import softwrap
from pants.util.strutil import pluralize, softwrap


class OptionsError(Exception):
Expand Down Expand Up @@ -116,7 +116,7 @@ def __init__(self, flags: Tuple[str, ...], arg_scope: str):
self.flags = flags
self.arg_scope = arg_scope
scope = f"scope {self.arg_scope}" if self.arg_scope else "global scope"
msg = f"Unknown flags {', '.join(self.flags)} on {scope}"
msg = f"Unknown {pluralize(len(self.flags), 'flag')} {', '.join(self.flags)} on {scope}"
super().__init__(msg)


Expand Down
47 changes: 26 additions & 21 deletions src/python/pants/option/options_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ def create_options(
args=["pants", *(args or ())],
env=env or {},
config_sources=[FileContent("pants.toml", toml.dumps(config or {}).encode())],
known_scope_infos=[*(ScopeInfo(scope) for scope in scopes), *(extra_scope_infos or ())],
known_scope_infos=[
*(ScopeInfo(scope, is_goal=scope != GLOBAL_SCOPE) for scope in scopes),
*(extra_scope_infos or ()),
],
)
register_fn(options)
return options
Expand Down Expand Up @@ -478,26 +481,28 @@ def _parse(


_known_scope_infos = [
ScopeInfo(scope)
for scope in (
GLOBAL_SCOPE,
"anotherscope",
"compile",
"compile.java",
"stale",
"test",
"test.junit",
"passconsumer",
"simple",
"simple-dashed",
"scoped.a.bit",
"scoped.and-dashed",
"fromfile",
"fingerprinting",
"enum-opt",
"separate-enum-opt-scope",
"other-enum-scope",
)
ScopeInfo(GLOBAL_SCOPE),
*[
ScopeInfo(scope, is_goal=True)
for scope in (
"anotherscope",
"compile",
"compile.java",
"stale",
"test",
"test.junit",
"passconsumer",
"simple",
"simple-dashed",
"scoped.a.bit",
"scoped.and-dashed",
"fromfile",
"fingerprinting",
"enum-opt",
"separate-enum-opt-scope",
"other-enum-scope",
)
],
]


Expand Down
93 changes: 77 additions & 16 deletions src/rust/engine/options/src/arg_splitter.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
// Copyright 2025 Pants project contributors (see CONTRIBUTORS.md).
// Licensed under the Apache License, Version 2.0 (see LICENSE).

use crate::GoalInfo;
use crate::flags::Flag;
use crate::{GoalInfo, Scope};
use lazy_static::lazy_static;
use regex::Regex;
use std::collections::{HashMap, HashSet};
use std::env;
use std::path::{Path, PathBuf};

// These are the names for the built in goals to print help message when there is no goal, or any
Expand All @@ -15,8 +17,36 @@ pub const UNKNOWN_GOAL_NAME: &str = "__unknown_goal";

lazy_static! {
static ref SPEC_RE: Regex = Regex::new(r"[/\\.:*#]").unwrap();
static ref SINGLE_DASH_FLAGS: HashSet<&'static str> =
HashSet::from(["-ltrace", "-ldebug", "-linfo", "-lwarn", "-lerror", "-h", "-v", "-V"]);
static ref SINGLE_DASH_FLAGS: HashSet<&'static str> = HashSet::from([
"-h", "-v", "-V", "-ltrace", "-ldebug", "-linfo", "-lwarn", "-lerror", "-l=trace",
"-l=debug", "-l=info", "-l=warn", "-l=error",
]);
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Args {
pub(crate) arg_strings: Vec<String>,
}

impl Args {
// Create an Args instance with the provided args, which must *not* include the
// argv[0] process name.
pub fn new<I: IntoIterator<Item = String>>(arg_strs: I) -> Self {
Self {
arg_strings: arg_strs.into_iter().collect(),
}
}

pub fn argv() -> Self {
let mut args = env::args().collect::<Vec<_>>().into_iter();
args.next(); // Consume the process name (argv[0]).
// TODO: In Pants's own integration tests we may invoke Pants in a subprocess via
// `python -m pants` or `python path/to/__main__.py` or similar. So
// skipping argv[0] may not be sufficient to get just the set of args to split.
// In practice our tests pass despite these extra args being interpreted as specs
// or goals, but that is skating on thin ice.
Self::new(args.collect::<Vec<_>>())
}
}

// The details of a Pants invocation command, not including option flags.
Expand All @@ -26,7 +56,8 @@ pub struct PantsCommand {
pub goals: Vec<String>, // Requested known goals.
pub unknown_goals: Vec<String>, // Any requested but unknown goals.
pub specs: Vec<String>, // What to run against, e.g. targets or files/dirs.
pub passthru: Vec<String>, // Any remaining args specified after a -- separator.
pub(crate) flags: Vec<Flag>, // Option values set via flag.
pub passthru: Option<Vec<String>>, // Any remaining args specified after a -- separator.
}

impl PantsCommand {
Expand All @@ -36,7 +67,8 @@ impl PantsCommand {
goals: vec![],
unknown_goals: vec![],
specs: vec![],
passthru: vec![],
flags: vec![],
passthru: None,
}
}

Expand Down Expand Up @@ -70,17 +102,18 @@ impl ArgSplitter {
}

// Split the given args, which must *not* include the argv[0] process name.
pub fn split_args(&self, args: Vec<String>) -> PantsCommand {
pub fn split_args(&self, args: Args) -> PantsCommand {
let mut builtin_or_auxiliary_goal: Option<String> = None;
let mut goals = vec![];
let mut unknown_goals: Vec<String> = vec![];
let mut specs = vec![];
let mut passthru = vec![];
let mut flags = vec![];
let mut passthru = None;
let mut scope = Scope::Global;

let mut unconsumed_args = args;
unconsumed_args.reverse();
let mut unconsumed_args = args.arg_strings.into_iter();

// Scan the args looking for goals and specs.
// Scan the args looking for goals, specs and flags.
// The one hard case is a single word like `foo` with no path- or target-like qualities
// (e.g., a slash or a colon). It could be a goal, or it could be a top-level directory.
// We disambiguate thus: If it is a known goal name, assume the user intended a goal.
Expand All @@ -90,7 +123,7 @@ impl ArgSplitter {
// changes to plugins (as they can introduce new goals) or changes on the filesystem.
// We might want to deprecate this behavior and consistently assume that these are goals,
// since the user can always add a `./` prefix to override.
while let Some(arg) = unconsumed_args.pop() {
while let Some(arg) = unconsumed_args.next() {
let goal_info = self.known_goals.get(&arg);
// Some special flags, such as `-v` and `--help`, are implemented as
// goal aliases, so we must check this before checking for any dash prefixes.
Expand All @@ -106,13 +139,38 @@ impl ArgSplitter {
} else {
goals.push(canonical_scope_name);
}
scope = Scope::Scope(arg.clone());
} else if arg == "--" {
// Arg is the passthru delimiter.
for item in unconsumed_args.drain(..) {
passthru.push(item);
let mut remainder = vec![];
for s in unconsumed_args.by_ref() {
remainder.push(s);
}
passthru = Some(remainder);
} else if arg.starts_with("--") {
let mut components = arg.splitn(2, '=');
let flag_name = components.next().unwrap();
flags.push(Flag {
context: scope.clone(),
key: flag_name.to_string(),
value: components.next().map(str::to_string),
});
} else if SINGLE_DASH_FLAGS.contains(arg.as_str()) {
let (flag_name, mut flag_value) = arg.split_at(2);
// We support both -ldebug and -l=debug, so strip that extraneous equals sign.
if let Some(stripped) = flag_value.strip_prefix('=') {
flag_value = stripped;
}
passthru.reverse();
} else if !(arg.starts_with("--") || SINGLE_DASH_FLAGS.contains(arg.as_str())) {
flags.push(Flag {
context: scope.clone(),
key: flag_name.to_string(),
value: if flag_value.is_empty() {
None
} else {
Some(flag_value.to_string())
},
});
} else {
// This is not a flag, so it must be an unknown goal or a spec (or a negative spec,
// which starts with a single dash, and we know is not a single dash flag).
if arg.starts_with("-")
Expand All @@ -121,9 +179,11 @@ impl ArgSplitter {
{
// Arg is a spec.
specs.push(arg);
// Revert to global context for any trailing flags.
scope = Scope::Global;
} else {
// Arg is an unknown goal.
unknown_goals.push(arg);
unknown_goals.push(arg.clone());
}
}
}
Expand All @@ -141,6 +201,7 @@ impl ArgSplitter {
goals,
unknown_goals,
specs,
flags,
passthru,
}
}
Expand Down
Loading

0 comments on commit dd87b85

Please sign in to comment.