Skip to content

Commit

Permalink
fix: Fix for #141 regexes with starting and ending with .*
Browse files Browse the repository at this point in the history
  • Loading branch information
en-milie committed Oct 2, 2024
1 parent 23057de commit 5b67a25
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,8 @@ private static String removeStartEndAnyChar(String regex) {
return regex;
}

// Remove leading unescaped '.*' or '*' patterns
regex = regex.replaceAll("^(?:(?<!\\\\)\\*+\\.*|\\.*(?<!\\\\)\\*+)", "");

// Remove trailing unescaped '.*' or '*' patterns
regex = regex.replaceAll("(?:(?<!\\\\)\\*+\\.*|\\.*(?<!\\\\)\\*+)$", "\\\\w*");
regex = regex.replaceAll("(?<!\\\\)\\.\\*$", "");

return regex;
}
Expand Down
63 changes: 48 additions & 15 deletions src/main/java/com/endava/cats/generator/simple/StringGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.util.Locale;
import java.util.Optional;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
Expand Down Expand Up @@ -186,35 +187,64 @@ public static String generate(String pattern, int min, int max) {
String cleanedPattern = cleanPattern(pattern);
String flattenedPattern = RegexFlattener.flattenRegex(cleanedPattern);

List<Supplier<Optional<String>>> attempts = List.of(
() -> generateString(pattern, min, max, cleanedPattern, flattenedPattern),
() -> generateString(cleanedPattern, min, max, cleanedPattern, flattenedPattern),
() -> generateString(cleanedPattern, min, max, cleanedPattern, cleanedPattern),
() -> generateString(flattenedPattern, min, max, cleanedPattern, flattenedPattern),
() -> generateString(flattenedPattern, min, max, flattenedPattern, flattenedPattern)
);

return attempts.stream()
.flatMap(attempt -> attempt.get().stream())
.findFirst()
.orElseThrow(() -> new IllegalArgumentException(
String.format("Could not generate a string for pattern %s with min %d and max %d", pattern, min, max)
));
}

private static Optional<String> generateString(String pattern, int min, int max, String cleanedPattern, String flattenedPattern) {
String valueBasedOnSimpleRegexes = tryGenerateWithSimpleRegexes(pattern, min, max, cleanedPattern);

if (valueBasedOnSimpleRegexes != null) {
return valueBasedOnSimpleRegexes;
return Optional.of(valueBasedOnSimpleRegexes);
}

GeneratorParams generatorParams = new GeneratorParams(flattenedPattern, min, max, cleanedPattern);

String generatedWithRgxGenerator = callGenerateTwice(StringGenerator::generateUsingRgxGenerator, generatorParams);
if (generatedWithRgxGenerator != null) {
return generatedWithRgxGenerator;
Optional<String> generatedWithInitialMinMax = callGeneratorsInOrder(generatorParams);
if (generatedWithInitialMinMax.isPresent()) {
return generatedWithInitialMinMax;
}

if (min == -1 && max == -1) {
GeneratorParams generatorParamsWithMinMax = new GeneratorParams(flattenedPattern, 0, 300, cleanedPattern);
return generateUsingRegexpGen(generatorParamsWithMinMax);
GeneratorParams generatorParamsWithMinMax = new GeneratorParams(flattenedPattern, 1, 300, cleanedPattern);

Optional<String> generatedWithAdjustedMinMax = callGeneratorsInOrder(generatorParamsWithMinMax);
if (generatedWithAdjustedMinMax.isPresent()) {
return generatedWithAdjustedMinMax;
}
}

String generatedUsingCatsRegexGenerator = callGenerateTwice(StringGenerator::generateUsingCatsRegexGenerator, generatorParams);
if (generatedUsingCatsRegexGenerator != null) {
return generatedUsingCatsRegexGenerator;
return Optional.empty();
}

private static Optional<String> callGeneratorsInOrder(GeneratorParams generatorParams) {
String rgxGeneratedWithMinMax = callGenerateTwice(StringGenerator::generateUsingRgxGenerator, generatorParams);
if (rgxGeneratedWithMinMax != null) {
return Optional.of(rgxGeneratedWithMinMax);
}

String generateUsingRegexpGen = callGenerateTwice(StringGenerator::generateUsingRegexpGen, generatorParams);
if (generateUsingRegexpGen != null) {
return generateUsingRegexpGen;
String generatedWithCatsRegexGenerator = callGenerateTwice(StringGenerator::generateUsingCatsRegexGenerator, generatorParams);
if (generatedWithCatsRegexGenerator != null) {
return Optional.of(generatedWithCatsRegexGenerator);
}

throw new IllegalArgumentException("Could not generate a string for pattern " + pattern + " with min " + min + " and max " + max);
String generatedWithRegexpGen = callGenerateTwice(StringGenerator::generateUsingRegexpGen, generatorParams);
if (generatedWithRegexpGen != null) {
return Optional.of(generatedWithRegexpGen);
}
return Optional.empty();
}

public static String tryGenerateWithSimpleRegexes(String originalPattern, int min, int max, String cleanedPattern) {
Expand All @@ -239,7 +269,10 @@ public static String callGenerateTwice(Function<GeneratorParams, String> generat
LOGGER.debug("Generator {} failed #atempt 1", generator.getClass().getSimpleName());
}
try {
String secondVersion = generator.apply(new GeneratorParams(removeLookaheadAssertions(generatorParams.cleanedPattern()), generatorParams.min, generatorParams.max, generatorParams.originalPattern()));
String patternWithLookaheadsRemoved = removeLookaheadAssertions(generatorParams.cleanedPattern());
LOGGER.debug("Pattern with lookaheads removed {}", patternWithLookaheadsRemoved);

String secondVersion = generator.apply(new GeneratorParams(patternWithLookaheadsRemoved, generatorParams.min, generatorParams.max, generatorParams.originalPattern()));
if (secondVersion.matches(generatorParams.originalPattern())) {
LOGGER.debug("Generated value with lookaheads removed " + secondVersion + " matched " + generatorParams.originalPattern());
return secondVersion;
Expand Down Expand Up @@ -604,7 +637,7 @@ public static List<String> getUnsupportedMediaTypes() {
*/
public static String removeLookaheadAssertions(String regex) {
regex = regex.replaceAll("\\(\\?=([^)]*)\\)", "($1)");
regex = regex.replaceAll("\\(\\?!([^)]*)\\)", "(^$1)");
regex = regex.replaceAll("\\(\\?!([^)]*)\\)", "");
regex = regex.replaceAll("\\(\\?<=([^)]*)\\)", "($1)");
regex = regex.replaceAll("\\(\\?<!([^)]*)\\)", "(^$1)");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ void shouldGenerateComplexEmailRegex() {
"^([0-9]{1,3}\\.){3}[0-9]{1,3}(:[0-9]{1,5})?$;^(\\d{1,3}\\.){3}\\d{1,3}(:\\d{1,5})?$",
"^([0-9]{4}-[0-9]{2}-[0-9]{2}[\\s\\t\\r\\n\\f]{0,1}[0-9]{2}:[0-9]{2}:[0-9]{2})\\s\\[([^\\]]+)\\]\\s(.*)$;^(\\d{4}-\\d{2}-\\d{2}\\s?\\d{2}:\\d{2}:\\d{2})\\s\\[([^\\]]+)\\]\\s(.*)$",
"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|6(?:011|5[0-9]{2})[0-9]{12})$;^(?:4\\d{12}(?:\\d{3})?|5[1-5]\\d{14}|3[47]\\d{13}|3(?:0[0-5]|[68]\\d)\\d{11}|6(?:011|5\\d{2})\\d{12})$",
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*;(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$)\\w*", ".*;.*", "^(?!\\s*$).+;^(?!\\s*$).+",
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*;(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$)", ".*;.*", "^(?!\\s*$).+;^(?!\\s*$).+",
"^[^\\u0000-\\u00FF]+$;^[\\u0100-\\uFFFF]+$"},
delimiter = ';')
void shouldFlatten(String regex, String expected) {
Expand Down Expand Up @@ -256,22 +256,41 @@ void shouldGenerateWithInnerDollarSign() {

@ParameterizedTest
@CsvSource(value = {
"'(^$)|^(((\\+|00)(9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)([[:space:]]?))?([\\d]{4}\\d{1,9})$)'; 1; 6000",
"'^(?!\\s*$).+'; 1; 128",
"'^[^\\u0000-\\u00FF]$'; -1; -1",
"'arn:aws:logs:[a-z\\-0-9]*:[0-9]{12}:log-group:([\\.\\-_/#A-Za-z0-9]+):\\*$'; 47; 562",
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*; 1; 6000",
"[^\\r\\n]; 1; 1", "^arn:[^:]{1,63}:ec2:[^:]{0,63}:[^:]{0,63}:subnet\\/subnet-[0-9a-f]{8,17}$|^$;2;100",
"(?=^.{8,64}$)((?=.*\\d)(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[^A-Za-z0-9\\s])(?=.*[a-z])|(?=.*[^A-Za-z0-9\\s])(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[A-Z])(?=.*[^A-Za-z0-9\\s]))^.*; 1; 256",
"^urn:tdm:(([a-z]{2}-(gov-)?[a-z]{4,9}-[0-9]{1,3}/[0-9]+/)*[\\p{Alnum}_]+(/[\\p{Alnum}_]+)*):([\\p{Alpha}]*):([\\p{Alnum}_]+(/[\\p{Alnum}_]+)*)$;1;200",
"^$|/.*;1;1024", "https://[a-zA-Z0-9-.]*\\.amazon(aws)?\\.com[/]?;1;1024"
"'(^$)|^(((\\+|00)(9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)([[:space:]]?))?([\\d]{4}\\d{1,9})$)'; 1; 6000; 1; 6000",
"'(^$)|^(((\\+|00)(9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)([[:space:]]?))?([\\d]{4}\\d{1,9})$)'; -1; -1; 1; 6000",
"'^(?!\\s*$).+'; 1; 128; 1; 128",
"'^(?!\\s*$).+'; -1; -1; 1; 128",
"'arn:aws:logs:[a-z\\-0-9]*:[0-9]{12}:log-group:([\\.\\-_/#A-Za-z0-9]+):\\*$'; 47; 562; 47; 562",
"'arn:aws:logs:[a-z\\-0-9]*:[0-9]{12}:log-group:([\\.\\-_/#A-Za-z0-9]+):\\*$'; -1; -1; 1; 562",
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*; 1; 6000; 1; 6000",
".*(^arn:((aws)|(aws-cn)|(aws-us-gov)):s3:::)([a-zA-Z0-9_-]+$).*; -1; -1; 1; 6000",
"[^\\r\\n]; 1; 1; 1; 1", "^arn:[^:]{1,63}:ec2:[^:]{0,63}:[^:]{0,63}:subnet\\/subnet-[0-9a-f]{8,17}$|^$; 2; 100; 2; 100",
"[^\\r\\n]; -1; -1; 1; 1", "^arn:[^:]{1,63}:ec2:[^:]{0,63}:[^:]{0,63}:subnet\\/subnet-[0-9a-f]{8,17}$|^$; -1; -1; 2; 300",
"(?=^.{8,64}$)((?=.*\\d)(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[^A-Za-z0-9\\s])(?=.*[a-z])|(?=.*[^A-Za-z0-9\\s])(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[A-Z])(?=.*[^A-Za-z0-9\\s]))^.*; 1; 512; 1; 512",
"(?=^.{8,64}$)((?=.*\\d)(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[^A-Za-z0-9\\s])(?=.*[a-z])|(?=.*[^A-Za-z0-9\\s])(?=.*[A-Z])(?=.*[a-z])|(?=.*\\d)(?=.*[A-Z])(?=.*[^A-Za-z0-9\\s]))^.*; -1; -1; 1; 512",
"^urn:tdm:(([a-z]{2}-(gov-)?[a-z]{4,9}-[0-9]{1,3}/[0-9]+/)*[\\p{Alnum}_]+(/[\\p{Alnum}_]+)*):([\\p{Alpha}]*):([\\p{Alnum}_]+(/[\\p{Alnum}_]+)*)$; 1; 200; 1; 200",
"^urn:tdm:(([a-z]{2}-(gov-)?[a-z]{4,9}-[0-9]{1,3}/[0-9]+/)*[\\p{Alnum}_]+(/[\\p{Alnum}_]+)*):([\\p{Alpha}]*):([\\p{Alnum}_]+(/[\\p{Alnum}_]+)*)$; -1; -1; 1; 200",
"^$|/.*; 1; 1024; 1; 1024",
"^$|/.*; -1; -1; 1; 1024",
"https://[a-zA-Z0-9-.]*\\.amazon(aws)?\\.com[/]?; 1; 1024; 1; 1024",
"https://[a-zA-Z0-9-.]*\\.amazon(aws)?\\.com[/]?; -1; -1; 1; 1024",
"^\\s*<(.|\\n)*SignalProcessingNotification(.|\\n)*>\\s*$; 1; 256; 1; 256",
"^\\s*<(.|\\n)*SignalProcessingNotification(.|\\n)*>\\s*$; -1; -1; 1; 300",
"^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$; 1; 20; 1; 20",
"^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s]((([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)([\\.,]\\d+(?!:))?)?(\\17[0-5]\\d([\\.,]\\d+)?)?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$; -1; -1; 1; 20",
".*(^TVA([1-9][0-9]?|1[0-9]{2})$).*; 1; 10; 1; 10",
".*(^TVA([1-9][0-9]?|1[0-9]{2})$).*; -1; -1; 1; 10"
}, delimiter = ';')
void shouldGenerateRegex(String pattern, int minSize, int maxSize) {
void shouldGenerateRegex(String pattern, int minSize, int maxSize, int expectedMinSize, int expectedMaxSize) {
String generated = StringGenerator.generate(pattern, minSize, maxSize);
if (minSize == -1 && maxSize == -1) {
Assertions.assertThat(generated).hasSize(1).matches(pattern);
} else {
Assertions.assertThat(generated).hasSizeBetween(minSize, maxSize).matches(pattern);
}

Assertions.assertThat(generated).hasSizeBetween(expectedMinSize, expectedMaxSize).matches(pattern);
}

@Test
void shouldHaveSizeOne() {
String pattern = "^[^\\u0000-\\u00FF]$";
String generated = StringGenerator.generate(pattern, -1, -1);
Assertions.assertThat(generated).hasSize(1).matches(pattern);
}
}

0 comments on commit 5b67a25

Please sign in to comment.