-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AJ-1517 download files only once #448
Changes from 20 commits
6ba24d4
d3a173c
c8f2e2f
04b1ba4
12d1310
feb9021
7af31b8
6929e4c
b00b902
4e6c17b
c938683
7122290
bbcfcd9
daaa3c1
f5b2fae
7db74d6
421bb39
e7554a5
c29481f
3b1816d
51e0682
3e2e841
31de46d
140d817
16daf59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
package org.databiosphere.workspacedataservice.dataimport; | ||
|
||
import com.google.common.collect.HashMultimap; | ||
import com.google.common.collect.Multimap; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URL; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.attribute.PosixFilePermission; | ||
import java.util.EnumSet; | ||
import java.util.Set; | ||
import org.apache.commons.io.FileUtils; | ||
import org.databiosphere.workspacedataservice.service.model.exception.TdrManifestImportException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.retry.annotation.Backoff; | ||
import org.springframework.retry.annotation.Retryable; | ||
|
||
public class FileDownloadHelper { | ||
|
||
private final DownloadHelper downloadHelper; | ||
|
||
public interface DownloadHelper { | ||
default void copyURLToFile(URL sourceUrl, File destinationFile) throws IOException { | ||
{ | ||
FileUtils.copyURLToFile(sourceUrl, destinationFile); | ||
} | ||
} | ||
} | ||
|
||
private final Logger logger = LoggerFactory.getLogger(this.getClass()); | ||
private final Path tempFileDir; | ||
private final Multimap<String, File> fileMap; | ||
private final Set<PosixFilePermission> permissions = | ||
EnumSet.of( | ||
PosixFilePermission.OWNER_READ, | ||
PosixFilePermission.GROUP_READ, | ||
PosixFilePermission.OTHERS_READ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could this be locked down even further, to just |
||
|
||
public FileDownloadHelper(String dirName, DownloadHelper downloadHelper) throws IOException { | ||
this.tempFileDir = Files.createTempDirectory(dirName); | ||
this.downloadHelper = downloadHelper; | ||
this.fileMap = HashMultimap.create(); | ||
} | ||
|
||
public FileDownloadHelper(String dirName) throws IOException { | ||
this( | ||
dirName, | ||
new DownloadHelper() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this doesn't need the anonymous sublcass & override anymore since the default impl does the right thing. Just pass in |
||
@Override | ||
public void copyURLToFile(URL sourceUrl, File destinationFile) throws IOException { | ||
DownloadHelper.super.copyURLToFile(sourceUrl, destinationFile); | ||
} | ||
}); | ||
} | ||
|
||
@Retryable(maxAttempts = 3, backoff = @Backoff(delay = 1000)) | ||
public void downloadFileFromURL(String tableName, URL pathToRemoteFile) { | ||
try { | ||
File tempFile = | ||
File.createTempFile(/* prefix= */ "tdr-", /* suffix= */ "download", tempFileDir.toFile()); | ||
logger.info("downloading to temp file {} ...", tempFile.getPath()); | ||
downloadHelper.copyURLToFile(pathToRemoteFile, tempFile); | ||
// In the TDR manifest, for Azure snapshots only, | ||
// the first file in the list will always be a directory. | ||
// Attempting to import that directory | ||
// will fail; it has no content. To avoid those failures, | ||
// check files for length and ignore any that are empty | ||
if (tempFile.length() == 0) { | ||
logger.info("Empty file in parquet, skipping"); | ||
Files.delete(tempFile.toPath()); | ||
} else { | ||
// Once the remote file has been copied to the temp file, make it read-only | ||
Files.setPosixFilePermissions(tempFile.toPath(), permissions); | ||
fileMap.put(tableName, tempFile); | ||
} | ||
} catch (IOException e) { | ||
throw new TdrManifestImportException(e.getMessage(), e); | ||
} | ||
} | ||
|
||
public void deleteFileDirectory() { | ||
try { | ||
Files.delete(tempFileDir); | ||
} catch (IOException e) { | ||
logger.error("Error deleting temporary files: {}", e.getMessage()); | ||
} | ||
} | ||
|
||
public Multimap<String, File> getFileMap() { | ||
return this.fileMap; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package org.databiosphere.workspacedataservice.dataimport; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.Mockito.doThrow; | ||
import static org.mockito.Mockito.times; | ||
import static org.mockito.Mockito.verify; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URL; | ||
import org.junit.jupiter.api.Test; | ||
import org.mockito.Mockito; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.boot.test.context.SpringBootTest; | ||
import org.springframework.core.io.Resource; | ||
import org.springframework.retry.backoff.FixedBackOffPolicy; | ||
import org.springframework.retry.support.RetryTemplate; | ||
|
||
@SpringBootTest | ||
public class FileDownloadHelperTest { | ||
|
||
@Value("classpath:parquet/empty.parquet") | ||
Resource emptyParquet; | ||
|
||
@Value("classpath:parquet/v2f/all_data_types.parquet") | ||
Resource allDataTypesParquet; | ||
|
||
@Test | ||
void downloadEmptyFile() throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is a duplicate of the test in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's worth testing both |
||
FileDownloadHelper helper = new FileDownloadHelper("test"); | ||
assertDoesNotThrow(() -> helper.downloadFileFromURL("empty_table", emptyParquet.getURL())); | ||
assert helper.getFileMap().isEmpty(); | ||
} | ||
|
||
@Test | ||
void testRetry() throws Exception { | ||
FileDownloadHelper.DownloadHelper mockDownloadHelper = | ||
Mockito.mock(FileDownloadHelper.DownloadHelper.class); | ||
doThrow(new IOException("Simulated connection error")) | ||
.doCallRealMethod() // Succeed on the second attempt | ||
.when(mockDownloadHelper) | ||
.copyURLToFile(any(URL.class), any(File.class)); | ||
|
||
// Create a RetryTemplate to set off Spring's retryable | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spring's There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that you need to include a I am pretty certain the reason that
there are probably other ways to do it … ( I could pair on this/contribute code if my explanations don't make sense) |
||
RetryTemplate retryTemplate = new RetryTemplate(); | ||
retryTemplate.setBackOffPolicy(new FixedBackOffPolicy()); | ||
|
||
FileDownloadHelper helper = new FileDownloadHelper("test", mockDownloadHelper); | ||
|
||
// A single connectivity error should not throw | ||
assertDoesNotThrow( | ||
() -> | ||
retryTemplate.execute( | ||
context -> { | ||
helper.downloadFileFromURL("table", allDataTypesParquet.getURL()); | ||
return null; | ||
})); | ||
|
||
// Make sure there actually was a connectivity problem | ||
verify(mockDownloadHelper, times(2)).copyURLToFile(any(URL.class), any(File.class)); | ||
|
||
// File should successfully download on second attempt | ||
assert helper.getFileMap().containsKey("table"); | ||
assert helper.getFileMap().get("table").size() == 1; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needed this method to not be abstract in order to mock it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very weird, I wasn't expecting that