-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AJ-1517 download files only once #448
Changes from 22 commits
6ba24d4
d3a173c
c8f2e2f
04b1ba4
12d1310
feb9021
7af31b8
6929e4c
b00b902
4e6c17b
c938683
7122290
bbcfcd9
daaa3c1
f5b2fae
7db74d6
421bb39
e7554a5
c29481f
3b1816d
51e0682
3e2e841
31de46d
140d817
16daf59
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package org.databiosphere.workspacedataservice.dataimport; | ||
|
||
import com.google.common.collect.HashMultimap; | ||
import com.google.common.collect.Multimap; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URL; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.attribute.PosixFilePermission; | ||
import java.util.EnumSet; | ||
import java.util.Set; | ||
import org.apache.commons.io.FileUtils; | ||
import org.databiosphere.workspacedataservice.service.model.exception.TdrManifestImportException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class FileDownloadHelper { | ||
|
||
private final Logger logger = LoggerFactory.getLogger(this.getClass()); | ||
private final Path tempFileDir; | ||
private final Multimap<String, File> fileMap; | ||
private final Set<PosixFilePermission> permissions = EnumSet.of(PosixFilePermission.OWNER_READ); | ||
|
||
public FileDownloadHelper(String dirName) throws IOException { | ||
this.tempFileDir = Files.createTempDirectory(dirName); | ||
this.fileMap = HashMultimap.create(); | ||
} | ||
|
||
public void downloadFileFromURL(String tableName, URL pathToRemoteFile) { | ||
try { | ||
File tempFile = File.createTempFile(/* prefix= */ "tdr-", /* suffix= */ "download"); | ||
logger.info("downloading to temp file {} ...", tempFile.getPath()); | ||
FileUtils.copyURLToFile(pathToRemoteFile, tempFile); | ||
// In the TDR manifest, for Azure snapshots only, | ||
// the first file in the list will always be a directory. | ||
// Attempting to import that directory | ||
// will fail; it has no content. To avoid those failures, | ||
// check files for length and ignore any that are empty | ||
if (tempFile.length() == 0) { | ||
logger.info("Empty file in parquet, skipping"); | ||
Files.delete(tempFile.toPath()); | ||
} else { | ||
// Once the remote file has been copied to the temp file, make it read-only | ||
Files.setPosixFilePermissions(tempFile.toPath(), permissions); | ||
fileMap.put(tableName, tempFile); | ||
} | ||
} catch (IOException e) { | ||
throw new TdrManifestImportException(e.getMessage(), e); | ||
} | ||
} | ||
|
||
public void deleteFileDirectory() { | ||
try { | ||
Files.delete(tempFileDir); | ||
} catch (IOException e) { | ||
logger.error("Error deleting temporary files: {}", e.getMessage()); | ||
} | ||
} | ||
|
||
public Multimap<String, File> getFileMap() { | ||
return fileMap; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package org.databiosphere.workspacedataservice.dataimport; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; | ||
|
||
import java.io.IOException; | ||
import org.junit.jupiter.api.Test; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.boot.test.context.SpringBootTest; | ||
import org.springframework.core.io.Resource; | ||
|
||
@SpringBootTest | ||
public class FileDownloadHelperTest { | ||
|
||
@Value("classpath:parquet/empty.parquet") | ||
Resource emptyParquet; | ||
|
||
@Test | ||
void downloadEmptyFile() throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is a duplicate of the test in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's worth testing both |
||
FileDownloadHelper helper = new FileDownloadHelper("test"); | ||
assertDoesNotThrow(() -> helper.downloadFileFromURL("empty_table", emptyParquet.getURL())); | ||
assert helper.getFileMap().isEmpty(); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this class should be responsible for creating the temp directory, and passing the Path to that directory down through
getFilesForImport
Then, later on, when this class wants to delete all the downloaded files, it can just delete the directory it already has a reference to.