Skip to content

Commit

Permalink
cli: add simple flag suggestions
Browse files Browse the repository at this point in the history
We look for similar flag names via Jaccard index on ngrams. In my
experience this tends to work better than Levenshtein or other edit
distance based metrics. Principally because it allows for out-of-order
suggestions. For example, --case-smart will result in a suggestion for
--smart-case, even though the edit distance between them is pretty big.

This is something Clap did for us. I initially thought it wasn't
necessary to add this back in, but I realized it wouldn't be much work
and might actually be helpful to folks.
  • Loading branch information
BurntSushi committed Nov 26, 2023
1 parent b0f6645 commit 56af4d4
Showing 1 changed file with 86 additions and 2 deletions.
88 changes: 86 additions & 2 deletions crates/core/flags/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Parses command line arguments into a structured and typed representation.
*/

use std::ffi::OsString;
use std::{borrow::Cow, collections::BTreeSet, ffi::OsString};

use anyhow::Context;

Expand Down Expand Up @@ -263,7 +263,11 @@ impl Parser {
anyhow::bail!("unrecognized flag -{name}")
}
FlagLookup::UnrecognizedLong(name) => {
anyhow::bail!("unrecognized flag --{name}")
let mut msg = format!("unrecognized flag --{name}");
if let Some(suggest_msg) = suggest(&name) {
msg = format!("{msg}\n\n{suggest_msg}");
}
anyhow::bail!("{msg}")
}
};
let value = if matches!(mat.kind, FlagInfoKind::Negated) {
Expand Down Expand Up @@ -390,3 +394,83 @@ impl FlagMap {
self.map.get(name).copied()
}
}

/// Possibly return a message suggesting flags similar in the name to the one
/// given.
///
/// The one given should be a flag given by the user (without the leading
/// dashes) that was unrecognized. This attempts to find existing flags that
/// are similar to the one given.
fn suggest(unrecognized: &str) -> Option<String> {
let similars = find_similar_names(unrecognized);
if similars.is_empty() {
return None;
}
let list = similars
.into_iter()
.map(|name| format!("--{name}"))
.collect::<Vec<String>>()
.join(", ");
Some(format!("similar flags that are available: {list}"))
}

/// Return a sequence of names similar to the unrecognized name given.
fn find_similar_names(unrecognized: &str) -> Vec<&'static str> {
// The jaccard similarity threshold at which we consider two flag names
// similar enough that it's worth suggesting it to the end user.
//
// This value was determined by some ad hoc experimentation. It might need
// further tweaking.
const THRESHOLD: f64 = 0.4;

let mut similar = vec![];
let bow_given = ngrams(unrecognized);
for &flag in FLAGS.iter() {
let name = flag.name_long();
let bow = ngrams(name);
if jaccard_index(&bow_given, &bow) >= THRESHOLD {
similar.push(name);
}
if let Some(name) = flag.name_negated() {
let bow = ngrams(name);
if jaccard_index(&bow_given, &bow) >= THRESHOLD {
similar.push(name);
}
}
for name in flag.aliases() {
let bow = ngrams(name);
if jaccard_index(&bow_given, &bow) >= THRESHOLD {
similar.push(name);
}
}
}
similar
}

/// A "bag of words" is a set of ngrams.
type BagOfWords<'a> = BTreeSet<Cow<'a, [u8]>>;

/// Returns the jaccard index (a measure of similarity) between sets of ngrams.
fn jaccard_index(ngrams1: &BagOfWords<'_>, ngrams2: &BagOfWords<'_>) -> f64 {
let union = u32::try_from(ngrams1.union(ngrams2).count())
.expect("fewer than u32::MAX flags");
let intersection = u32::try_from(ngrams1.intersection(ngrams2).count())
.expect("fewer than u32::MAX flags");
f64::from(intersection) / f64::from(union)
}

/// Returns all 3-grams in the slice given.
///
/// If the slice doesn't contain a 3-gram, then one is artificially created by
/// padding it out with a character that will never appear in a flag name.
fn ngrams(flag_name: &str) -> BagOfWords<'_> {
// We only allow ASCII flag names, so we can just use bytes.
let slice = flag_name.as_bytes();
let seq: Vec<Cow<[u8]>> = match slice.len() {
0 => vec![Cow::Owned(b"!!!".to_vec())],
1 => vec![Cow::Owned(vec![slice[0], b'!', b'!'])],
2 => vec![Cow::Owned(vec![slice[0], slice[1], b'!'])],
_ => slice.windows(3).map(Cow::Borrowed).collect(),
};
BTreeSet::from_iter(seq)
}

0 comments on commit 56af4d4

Please sign in to comment.