From ed8db8c22019cb1ffe5bf89794c565d94d03445c Mon Sep 17 00:00:00 2001 From: Josh Ladieu <111856+jladieu@users.noreply.github.com> Date: Tue, 7 May 2024 12:11:27 -0400 Subject: [PATCH] AJ-1812: Require gs:// URIs to match workspaceId (#775) This is a security measure to ensure that a bad actor can't craft a `RAWLSJSON` URI that hijacks a request for a different workspace. Paired with orchestration change: https://github.com/broadinstitute/firecloud-orchestration/commit/16074dd555261e683651c3dfac280912577d49e9 --- .../dataimport/DefaultImportValidator.java | 27 +++++++++-- .../DefaultImportValidatorTest.java | 48 +++++++++++++++---- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/service/src/main/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidator.java b/service/src/main/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidator.java index fefee568b..2214ae6e0 100644 --- a/service/src/main/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidator.java +++ b/service/src/main/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidator.java @@ -2,6 +2,7 @@ import static java.util.Collections.emptyList; import static java.util.Collections.emptySet; +import static java.util.regex.Pattern.compile; import bio.terra.workspace.model.WorkspaceDescription; import bio.terra.workspace.model.WsmPolicyInput; @@ -13,6 +14,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.databiosphere.workspacedataservice.config.DataImportProperties.ImportSourceConfig; import org.databiosphere.workspacedataservice.generated.ImportRequestServerModel; @@ -35,12 +37,12 @@ public class DefaultImportValidator implements ImportValidator { TypeEnum.TDRMANIFEST, Set.of(SCHEME_HTTPS)); private static final Set ALWAYS_ALLOWED_HOSTS = Set.of( - Pattern.compile("storage\\.googleapis\\.com"), - Pattern.compile(".*\\.core\\.windows\\.net"), + compile("storage\\.googleapis\\.com"), + compile(".*\\.core\\.windows\\.net"), // S3 allows multiple URL formats // https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html - Pattern.compile("s3\\.amazonaws\\.com"), // path style legacy global endpoint - Pattern.compile(".*\\.s3\\.amazonaws\\.com") // virtual host style legacy global endpoint + compile("s3\\.amazonaws\\.com"), // path style legacy global endpoint + compile(".*\\.s3\\.amazonaws\\.com") // virtual host style legacy global endpoint ); private final Map> allowedHostsByScheme; private final ImportRequirementsFactory importRequirementsFactory; @@ -56,7 +58,7 @@ public DefaultImportValidator( .put(SCHEME_HTTPS, Sets.union(ALWAYS_ALLOWED_HOSTS, allowedHttpsHosts)); if (StringUtils.isNotBlank(allowedRawlsBucket)) { - allowedHostsBuilder.put(SCHEME_GS, Set.of(Pattern.compile(allowedRawlsBucket))); + allowedHostsBuilder.put(SCHEME_GS, Set.of(compile(allowedRawlsBucket))); } else { allowedHostsBuilder.put(SCHEME_GS, emptySet()); } @@ -88,9 +90,24 @@ public void validateImport( "Files may not be imported from %s.".formatted(importUrl.getHost())); } + if (importType == TypeEnum.RAWLSJSON) { + validatePathBelongsToWorkspace(importRequest.getUrl().getPath(), destinationWorkspaceId); + } validateDestinationWorkspace(importRequest, destinationWorkspaceId); } + private static final String URI_TEMPLATE = "^/to-cwds/%s/.*\\.json$"; + + private void validatePathBelongsToWorkspace(String path, WorkspaceId workspaceId) { + Pattern expectedPattern = compile(URI_TEMPLATE.formatted(workspaceId.toString())); + Matcher matcher = expectedPattern.matcher(path); + + if (!matcher.matches()) { + throw new ValidationException( + "Expected URI with format %s".formatted(expectedPattern.toString())); + } + } + private void validateDestinationWorkspace( ImportRequestServerModel importRequest, WorkspaceId destinationWorkspaceId) { ImportRequirements requirements = diff --git a/service/src/test/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidatorTest.java b/service/src/test/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidatorTest.java index ac79faeb8..2fefa0603 100644 --- a/service/src/test/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidatorTest.java +++ b/service/src/test/java/org/databiosphere/workspacedataservice/dataimport/DefaultImportValidatorTest.java @@ -1,5 +1,6 @@ package org.databiosphere.workspacedataservice.dataimport; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatExceptionOfType; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -128,9 +129,10 @@ void rejectsImportsFromOtherSources() { } @Test - void acceptsGsUrlsForRawlsJsonImports() { + void acceptsGsUrlsWithMatchingWorkspaceIdForRawlsJsonImports() { // Arrange - URI importUri = URI.create("gs://test-bucket/file"); + URI importUri = + URI.create("gs://test-bucket/to-cwds/%s/random.json".formatted(destinationWorkspaceId)); ImportRequestServerModel importRequest = new ImportRequestServerModel(TypeEnum.RAWLSJSON, importUri); @@ -138,19 +140,49 @@ void acceptsGsUrlsForRawlsJsonImports() { assertDoesNotThrow(() -> importValidator.validateImport(importRequest, destinationWorkspaceId)); } + @Test + void rejectsGsUrlsWithCorrectBucketAndWorkspaceButUnexpectedFormat() { + // Arrange + URI importUri = + URI.create("gs://test-bucket/random/%s/random.json".formatted(destinationWorkspaceId)); + ImportRequestServerModel importRequest = + new ImportRequestServerModel(TypeEnum.RAWLSJSON, importUri); + + // Act/Assert + assertThatExceptionOfType(ValidationException.class) + .isThrownBy(() -> importValidator.validateImport(importRequest, destinationWorkspaceId)) + .withMessageContaining("/to-cwds/%s/".formatted(destinationWorkspaceId)); + } + @Test void rejectGsUrlsWithMismatchingBucketForJsonImports() { // Arrange - URI importUri = URI.create("gs://rando-bucket/file"); + URI importUri = + URI.create("gs://rando-bucket/to-cwds/%s/random.json".formatted(destinationWorkspaceId)); ImportRequestServerModel importRequest = new ImportRequestServerModel(TypeEnum.RAWLSJSON, importUri); // Act/Assert - ValidationException err = - assertThrows( - ValidationException.class, - () -> importValidator.validateImport(importRequest, destinationWorkspaceId)); - assertEquals("Files may not be imported from rando-bucket.", err.getMessage()); + assertThatExceptionOfType(ValidationException.class) + .isThrownBy( + () -> importValidator.validateImport(importRequest, WorkspaceId.of(UUID.randomUUID()))) + .withMessageContaining("Files may not be imported from rando-bucket."); + } + + @Test + void rejectsGsUrlsWithMismatchingWorkspaceId() { + // Arrange + UUID otherWorkspaceId = UUID.randomUUID(); + URI importUri = + URI.create( + "gs://test-bucket/to-cwds/%s/other-workspace-import.json".formatted(otherWorkspaceId)); + ImportRequestServerModel importRequest = + new ImportRequestServerModel(TypeEnum.RAWLSJSON, importUri); + + // Act/Assert + assertThatExceptionOfType(ValidationException.class) + .isThrownBy(() -> importValidator.validateImport(importRequest, destinationWorkspaceId)) + .withMessageContaining("/to-cwds/%s/".formatted(destinationWorkspaceId)); } @ParameterizedTest