From cccc06f0ffa99460ee6aa5a39160ee6d6c1b258d Mon Sep 17 00:00:00 2001 From: thejud Date: Fri, 11 Oct 2024 20:40:01 -0700 Subject: [PATCH] [guide] Add a RegexGuide (#2552) Co-authored-by: anjakefala --- visidata/features/regex.py | 10 ++-- visidata/guides/RegexGuide.md | 103 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 visidata/guides/RegexGuide.md diff --git a/visidata/features/regex.py b/visidata/features/regex.py index 94983aceb..3ec3990f0 100644 --- a/visidata/features/regex.py +++ b/visidata/features/regex.py @@ -126,12 +126,12 @@ def inputRegexSubst(vd, prompt): after=dict(type='regex-replace', prompt='replace: ', help=prompt)) -Sheet.addCommand(':', 'addcol-split', 'addColumnAtCursor(RegexColumn(makeRegexSplitter, cursorCol, inputRegex("split regex: ", type="regex-split")))', 'Add column split by regex') -Sheet.addCommand(';', 'addcol-capture', 'addColumnAtCursor(RegexColumn(makeRegexMatcher, cursorCol, inputRegex("capture regex: ", type="regex-capture")))', 'Add column captured by regex') +Sheet.addCommand(':', 'addcol-split', 'addColumnAtCursor(RegexColumn(makeRegexSplitter, cursorCol, inputRegex("split regex: ", type="regex-split")))', 'add column split by regex') +Sheet.addCommand(';', 'addcol-capture', 'addColumnAtCursor(RegexColumn(makeRegexMatcher, cursorCol, inputRegex("capture regex: ", type="regex-capture")))', 'add column captured by regex') -Sheet.addCommand('*', 'addcol-regex-subst', 'addColumnAtCursor(Column(cursorCol.name + "_re", getter=regexTransform(cursorCol, **inputRegexSubst("regex transform column"))))', 'add column derived from current column, replacing regex with subst (may include \1 backrefs)') -Sheet.addCommand('g*', 'setcol-regex-subst', 'setValuesFromRegex([cursorCol], someSelectedRows, **inputRegexSubst("regex transform column"))', 'regex/subst - modify selected rows in current column, replacing regex with subst, (may include backreferences \\1 etc)') -Sheet.addCommand('gz*', 'setcol-regex-subst-all', 'setValuesFromRegex(visibleCols, someSelectedRows, **inputRegexSubst(f"regex transform {nVisibleCols} columns"))', 'modify selected rows in all visible columns, replacing regex with subst (may include \\1 backrefs)') +Sheet.addCommand('*', 'addcol-regex-subst', 'addColumnAtCursor(Column(cursorCol.name + "_re", getter=regexTransform(cursorCol, **inputRegexSubst("regex transform column"))))', 'add column derived from current column, replacing `search` regex with `replace` (may include \\1 backrefs)') +Sheet.addCommand('g*', 'setcol-regex-subst', 'setValuesFromRegex([cursorCol], someSelectedRows, **inputRegexSubst("regex transform column"))', 'modify selected rows in current column, replacing `search` regex with `replace`, (may include backreferences \\1 etc)') +Sheet.addCommand('gz*', 'setcol-regex-subst-all', 'setValuesFromRegex(visibleCols, someSelectedRows, **inputRegexSubst(f"regex transform {nVisibleCols} columns"))', 'modify selected rows in all visible columns, replacing `search` regex with `replace` (may include \\1 backrefs)') vd.addMenuItems(''' diff --git a/visidata/guides/RegexGuide.md b/visidata/guides/RegexGuide.md new file mode 100644 index 000000000..2c03149e6 --- /dev/null +++ b/visidata/guides/RegexGuide.md @@ -0,0 +1,103 @@ +--- +sheet: Sheet +--- +# Matching and Transforming Strings with Regex + +Some commands for selecting, searching, and transforming data, accept a regular expression as input. + +## Select Rows + +- {help.commands.select-col-regex} +- {help.commands.select-cols-regex} + +- {help.commands.unselect-col-regex} +- {help.commands.unselect-cols-regex} + +## Search + +- {help.commands.go-col-regex} + +- {help.commands.search-col} +- {help.commands.search-cols} + +- {help.commands.searchr-col} +- {help.commands.searchr-cols} + +- {help.commands.search-next} +- {help.commands.searchr-next} + +- {help.commands.search-keys} + +## Substitution + +- {help.commands.setcol-regex-subst} +- {help.commands.setcol-regex-subst-all} + +`Tab` to move between `search` and `replace` inputs. +An empty `replace` removes the matching string. + +# Column Creation + +- {help.commands.addcol-regex-subst} +- {help.commands.addcol-split} +- {help.commands.addcol-capture} + +## Examples + +### Split + +Sample input sheet **sales**: + + date price + ---------- ----- + 2024-09-01 30 + 2024-09-02 28 + 2024-09-03 100 + +1. [:code]:[/] (`addcol-split`) on **date** column, followed by `-` to split on hyphens. + + date date_re price + ---------- ---------------- ----- + 2024-09-01 [3] 2024; 09; 01 30 + 2024-09-02 [3] 2024; 09; 02 28 + 2024-09-03 [3] 2024; 09; 03 100 + +Note that the results in the `date_re` column are lists of length 3. + +2. [:code]([/] (`expand-col`) to expand a column with lists into multiple columns with the list elements. + + date date_re[0] date_re[1] date_re[2] price + ---------- ---------- ---------- ---------- ----- + 2024-09-01 2024 09 01 30 + 2024-09-02 2024 09 02 28 + 2024-09-03 2024 09 03 100 + +### Substitution + +1. On the **date** column, [:code]*[/] (`addcol-regex-subst`) and type `-`, then `Tab` to "replace" and type `,`. Then `Enter` to replace all `-` with `,`. + + date date_re price + ---------- ---------- ----- + 2024-09-01 2024,09,01 30 + 2024-09-02 2024,09,02 28 + 2024-09-03 2024,09,03 100 + +### Capture + +1. On the **date** column, [:code];[/] (`addcol-capture`) and type `(\d\d\d\d)` to capture and pull out the year. + + date date_re price + ---------- -------- ----- + 2024-09-01 [1] 2024 30 + 2024-09-02 [1] 2024 28 + 2024-09-03 [1] 2024 100 + +Note that the results in the `date_re` column are lists of length 1. + +2. [:code]([/] (`expand-col`) to expand a column with lists into multiple columns with the list elements. + + date date_re[0] price + ---------- ---------- ----- + 2024-09-01 2024 30 + 2024-09-02 2024 28 + 2024-09-03 2024 100