From 0df1d9d97b278710bbf0424281313d4682805e48 Mon Sep 17 00:00:00 2001 From: rzvxa <3788964+rzvxa@users.noreply.github.com> Date: Thu, 5 Sep 2024 05:59:22 +0000 Subject: [PATCH] fix(ast, codegen, linter): panics in fixers. (#5431) Closes #5434 https://github.com/oxc-project/oxlint-ecosystem-ci/actions/runs/10685877915/job/29619941099 --- crates/oxc_ast/src/ast_impl/literal.rs | 8 +++-- crates/oxc_codegen/src/annotation_comment.rs | 6 ++-- crates/oxc_codegen/src/gen.rs | 9 ++++-- crates/oxc_codegen/src/lib.rs | 23 +++++++++++---- .../oxc_codegen/tests/integration/tester.rs | 11 +++++++ crates/oxc_codegen/tests/integration/unit.rs | 29 ++++++++++++++++++- crates/oxc_linter/src/fixer/mod.rs | 1 + .../src/rules/eslint/no_control_regex.rs | 2 +- .../rules/eslint/no_empty_character_class.rs | 2 +- .../src/rules/eslint/no_regex_spaces.rs | 1 + .../src/rules/unicorn/no_hex_escape.rs | 2 +- .../unicorn/prefer_string_replace_all.rs | 1 + .../unicorn/prefer_string_starts_ends_with.rs | 1 + crates/oxc_prettier/src/format/mod.rs | 2 +- 14 files changed, 79 insertions(+), 19 deletions(-) diff --git a/crates/oxc_ast/src/ast_impl/literal.rs b/crates/oxc_ast/src/ast_impl/literal.rs index 9f7bd619e6294..3d968a433d3a0 100644 --- a/crates/oxc_ast/src/ast_impl/literal.rs +++ b/crates/oxc_ast/src/ast_impl/literal.rs @@ -6,6 +6,7 @@ use crate::ast::*; use std::{ + borrow::Cow, fmt, hash::{Hash, Hasher}, }; @@ -132,10 +133,11 @@ impl<'a> RegExpPattern<'a> { self.len() == 0 } - pub fn source_text(&self, source_text: &'a str) -> &'a str { + pub fn source_text(&self, source_text: &'a str) -> Cow { match self { - Self::Raw(raw) | Self::Invalid(raw) => raw, - Self::Pattern(pat) => pat.span.source_text(source_text), + Self::Raw(raw) | Self::Invalid(raw) => Cow::Borrowed(raw), + Self::Pattern(pat) if pat.span.is_unspanned() => Cow::Owned(pat.to_string()), + Self::Pattern(pat) => Cow::Borrowed(pat.span.source_text(source_text)), } } diff --git a/crates/oxc_codegen/src/annotation_comment.rs b/crates/oxc_codegen/src/annotation_comment.rs index e42cd70f4f9c0..1525692f2abcb 100644 --- a/crates/oxc_codegen/src/annotation_comment.rs +++ b/crates/oxc_codegen/src/annotation_comment.rs @@ -54,6 +54,7 @@ impl<'a> Codegen<'a> { return vec![]; } let mut latest_comment_start = node_start; + let source_text = self.source_text.unwrap_or_default(); let mut ret = self .get_leading_comments(self.latest_consumed_comment_end, node_start) .rev() @@ -61,15 +62,14 @@ impl<'a> Codegen<'a> { .take_while(|comment| { let comment_end = comment.real_span_end(); let range_content = - &self.source_text[comment_end as usize..latest_comment_start as usize]; + &source_text[comment_end as usize..latest_comment_start as usize]; let all_whitespace = range_content.chars().all(char::is_whitespace); latest_comment_start = comment.real_span_start(); all_whitespace }) .filter_map(|comment| { - let source_code = self.source_text; let comment_content = - &source_code[comment.span.start as usize..comment.span.end as usize]; + &source_text[comment.span.start as usize..comment.span.end as usize]; if let Some(m) = MATCHER.find_iter(&comment_content).next() { let annotation_kind = match m.value() { 0 | 1 => AnnotationKind::NO_SIDE_EFFECTS, diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index 80e887818ec00..80ab2cc676e8a 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -1,4 +1,4 @@ -use std::ops::Not; +use std::{borrow::Cow, ops::Not}; use oxc_allocator::{Box, Vec}; #[allow(clippy::wildcard_imports)] @@ -1234,7 +1234,10 @@ impl<'a> Gen for RegExpLiteral<'a> { fn gen(&self, p: &mut Codegen, _ctx: Context) { p.add_source_mapping(self.span.start); let last = p.peek_nth(0); - let pattern_text = self.regex.pattern.source_text(p.source_text); + let pattern_text = p.source_text.map_or_else( + || Cow::Owned(self.regex.pattern.to_string()), + |src| self.regex.pattern.source_text(src), + ); // Avoid forming a single-line comment or " Gen for RegExpLiteral<'a> { p.print_hard_space(); } p.print_char(b'/'); - p.print_str(pattern_text); + p.print_str(pattern_text.as_ref()); p.print_char(b'/'); p.print_str(self.regex.flags.to_string().as_str()); p.prev_reg_exp_end = p.code().len(); diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index 5f254ce2ab35d..6f977574e3eb8 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -63,7 +63,8 @@ pub struct Codegen<'a> { options: CodegenOptions, comment_options: CommentOptions, - source_text: &'a str, + /// Original source code of the AST + source_text: Option<&'a str>, trivias: Trivias, @@ -131,7 +132,7 @@ impl<'a> Codegen<'a> { Self { options: CodegenOptions::default(), comment_options: CommentOptions::default(), - source_text: "", + source_text: None, trivias: Trivias::default(), mangler: None, code: vec![], @@ -169,6 +170,15 @@ impl<'a> Codegen<'a> { self } + /// Adds the source text of the original AST, It is used with comments or for improving the + /// generated output. + #[must_use] + pub fn with_source_text(mut self, source_text: &'a str) -> Self { + self.source_text = Some(source_text); + self + } + + /// Also sets the [Self::with_source_text] #[must_use] pub fn enable_comment( mut self, @@ -176,10 +186,9 @@ impl<'a> Codegen<'a> { trivias: Trivias, options: CommentOptions, ) -> Self { - self.source_text = source_text; self.trivias = trivias; self.comment_options = options; - self + self.with_source_text(source_text) } #[must_use] @@ -539,8 +548,12 @@ impl<'a> Codegen<'a> { /// Avoid issue related to rustc borrow checker . /// Since if you want to print a range of source code, you need to borrow the source code /// as immutable first, and call the [Self::print_str] which is a mutable borrow. + /// + /// # Panics + /// If `self.source_text` isn't set. fn print_range_of_source_code(&mut self, range: Range) { - self.code.extend_from_slice(self.source_text[range].as_bytes()); + let source_text = self.source_text.expect("expect `Codegen::source_text` to be set."); + self.code.extend_from_slice(source_text[range].as_bytes()); } fn get_leading_comments( diff --git a/crates/oxc_codegen/tests/integration/tester.rs b/crates/oxc_codegen/tests/integration/tester.rs index cc2955aa28516..78207e0189949 100644 --- a/crates/oxc_codegen/tests/integration/tester.rs +++ b/crates/oxc_codegen/tests/integration/tester.rs @@ -21,6 +21,17 @@ pub fn test(source_text: &str, expected: &str) { ); } +pub fn test_without_source(source_text: &str, expected: &str) { + let source_type = SourceType::default().with_module(true).with_jsx(true); + let allocator = Allocator::default(); + let ret = Parser::new(&allocator, source_text, source_type).parse(); + let result = CodeGenerator::new().build(&ret.program).source_text; + assert_eq!( + result, expected, + "\nfor source {source_text:?}\nexpect {expected:?}\ngot {result:?}\nwithout providing the original code." + ); +} + pub fn test_minify(source_text: &str, expected: &str) { let source_type = SourceType::default().with_module(true).with_jsx(true); let allocator = Allocator::default(); diff --git a/crates/oxc_codegen/tests/integration/unit.rs b/crates/oxc_codegen/tests/integration/unit.rs index 8c559c66ba50a..b4066f662be55 100644 --- a/crates/oxc_codegen/tests/integration/unit.rs +++ b/crates/oxc_codegen/tests/integration/unit.rs @@ -1,4 +1,4 @@ -use crate::tester::{test, test_minify}; +use crate::tester::{test, test_minify, test_without_source}; #[test] fn module_decl() { @@ -63,6 +63,33 @@ fn unicode_escape() { test("console.log('🧑‍🤝‍🧑');", "console.log(\"🧑‍🤝‍🧑\");\n"); } +#[test] +fn regex() { + fn test_all(source: &str, expect: &str, minify: &str) { + test(source, expect); + test_minify(source, minify); + test_without_source(source, expect); + } + test_all("/regex/giv", "/regex/giv;\n", "/regex/giv;"); + test_all( + r"/(.)(.)(.)(.)(.)(.)(.)(.)\8\8/", + "/(.)(.)(.)(.)(.)(.)(.)(.)\\8\\8/;\n", + "/(.)(.)(.)(.)(.)(.)(.)(.)\\8\\8/;", + ); + + test_all( + r"/\n\cM\0\x41\u{1f600}\./u", + "/\\n\\cM\\0\\x41\\u{1f600}\\./u;\n", + "/\\n\\cM\\0\\x41\\u{1f600}\\./u;", + ); + test_all(r"/\n\cM\0\x41\./u", "/\\n\\cM\\0\\x41\\./u;\n", "/\\n\\cM\\0\\x41\\./u;"); + test_all( + r"/\n\cM\0\x41\u1234\./", + "/\\n\\cM\\0\\x41\\u1234\\./;\n", + "/\\n\\cM\\0\\x41\\u1234\\./;", + ); +} + #[test] fn comma() { test_minify("1, 2, 3", "1,2,3;"); diff --git a/crates/oxc_linter/src/fixer/mod.rs b/crates/oxc_linter/src/fixer/mod.rs index e30874f48fc3e..88e644ac796e4 100644 --- a/crates/oxc_linter/src/fixer/mod.rs +++ b/crates/oxc_linter/src/fixer/mod.rs @@ -171,6 +171,7 @@ impl<'c, 'a: 'c> RuleFixer<'c, 'a> { #[allow(clippy::unused_self)] pub fn codegen(self) -> CodeGenerator<'a> { CodeGenerator::new() + .with_source_text(self.source_text()) .with_options(CodegenOptions { single_quote: true, ..CodegenOptions::default() }) } diff --git a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs index 4c7e03c0c02c0..4a9de18655940 100644 --- a/crates/oxc_linter/src/rules/eslint/no_control_regex.rs +++ b/crates/oxc_linter/src/rules/eslint/no_control_regex.rs @@ -68,7 +68,7 @@ impl Rule for NoControlRegex { let mut violations: Vec<&str> = Vec::new(); let pattern = pattern.as_ref(); let pattern_text = pattern.source_text(context.source_text()); - for matched_ctl_pattern in control_patterns(pattern_text) { + for matched_ctl_pattern in control_patterns(pattern_text.as_ref()) { let ctl = matched_ctl_pattern.as_str(); // check for an even number of backslashes, since these will diff --git a/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs b/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs index 203d1a1ad6cef..69e71e51e27b6 100644 --- a/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs +++ b/crates/oxc_linter/src/rules/eslint/no_empty_character_class.rs @@ -50,7 +50,7 @@ impl Rule for NoEmptyCharacterClass { if let AstKind::RegExpLiteral(lit) = node.kind() { if !NO_EMPTY_CLASS_REGEX_PATTERN - .is_match(lit.regex.pattern.source_text(ctx.source_text())) + .is_match(lit.regex.pattern.source_text(ctx.source_text()).as_ref()) { ctx.diagnostic(no_empty_character_class_diagnostic(lit.span)); } diff --git a/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs b/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs index 3e37ac8910902..f38e62e3c4612 100644 --- a/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs +++ b/crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs @@ -69,6 +69,7 @@ impl Rule for NoRegexSpaces { impl NoRegexSpaces { fn find_literal_to_report(literal: &RegExpLiteral, ctx: &LintContext) -> Option { let pattern_text = literal.regex.pattern.source_text(ctx.source_text()); + let pattern_text = pattern_text.as_ref(); if Self::has_exempted_char_class(pattern_text) { return None; } diff --git a/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs b/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs index 198df27a8a6c7..1531b9e59f70e 100644 --- a/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs +++ b/crates/oxc_linter/src/rules/unicorn/no_hex_escape.rs @@ -86,7 +86,7 @@ impl Rule for NoHexEscape { } AstKind::RegExpLiteral(regex) => { if let Some(fixed) = - check_escape(regex.regex.pattern.source_text(ctx.source_text())) + check_escape(regex.regex.pattern.source_text(ctx.source_text()).as_ref()) { #[allow(clippy::cast_possible_truncation)] ctx.diagnostic_with_fix(no_hex_escape_diagnostic(regex.span), |fixer| { diff --git a/crates/oxc_linter/src/rules/unicorn/prefer_string_replace_all.rs b/crates/oxc_linter/src/rules/unicorn/prefer_string_replace_all.rs index ca3f29ef3cd10..5b70f40961f83 100644 --- a/crates/oxc_linter/src/rules/unicorn/prefer_string_replace_all.rs +++ b/crates/oxc_linter/src/rules/unicorn/prefer_string_replace_all.rs @@ -127,6 +127,7 @@ fn get_pattern_replacement<'a>( } let pattern_text = reg_exp_literal.regex.pattern.source_text(ctx.source_text()); + let pattern_text = pattern_text.as_ref(); if !is_simple_string(pattern_text) { return None; } diff --git a/crates/oxc_linter/src/rules/unicorn/prefer_string_starts_ends_with.rs b/crates/oxc_linter/src/rules/unicorn/prefer_string_starts_ends_with.rs index 1f20c2dbc9217..90d85a884a8b1 100644 --- a/crates/oxc_linter/src/rules/unicorn/prefer_string_starts_ends_with.rs +++ b/crates/oxc_linter/src/rules/unicorn/prefer_string_starts_ends_with.rs @@ -75,6 +75,7 @@ impl Rule for PreferStringStartsEndsWith { }; let pattern_text = regex.regex.pattern.source_text(ctx.source_text()); + let pattern_text = pattern_text.as_ref(); let Some(err_kind) = check_regex(regex, pattern_text) else { return; diff --git a/crates/oxc_prettier/src/format/mod.rs b/crates/oxc_prettier/src/format/mod.rs index f421967853ccf..ee1a4892f10b1 100644 --- a/crates/oxc_prettier/src/format/mod.rs +++ b/crates/oxc_prettier/src/format/mod.rs @@ -1416,7 +1416,7 @@ impl<'a> Format<'a> for RegExpLiteral<'a> { fn format(&self, p: &mut Prettier<'a>) -> Doc<'a> { let mut parts = p.vec(); parts.push(ss!("/")); - parts.push(p.str(self.regex.pattern.source_text(p.source_text))); + parts.push(p.str(self.regex.pattern.source_text(p.source_text).as_ref())); parts.push(ss!("/")); parts.push(format!(p, self.regex.flags)); Doc::Array(parts)