From 0b0c2264d0a29acac86d8d350ef1ea8e904bf434 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 10 Sep 2024 20:09:23 +0200 Subject: [PATCH] Use a safer way to uncompress zip files --- .../crawler/fs/framework/FsCrawlerUtil.java | 29 --------------- .../fs/test/integration/AbstractITCase.java | 36 ++++++++++++++++--- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java b/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java index e04f47984..56318c97a 100644 --- a/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java +++ b/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java @@ -26,7 +26,6 @@ import org.apache.logging.log4j.Logger; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.CopyOption; @@ -48,8 +47,6 @@ import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.*; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; import java.util.regex.Pattern; public class FsCrawlerUtil { @@ -541,32 +538,6 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO } } - /** - * Unzip a jar file - * @param jarFile Jar file url like /path/to/foo.jar - * @param destination Directory where we want to extract the content to - * @throws IOException In case of any IO problem - */ - public static void unzip(String jarFile, Path destination) throws IOException { - try (JarFile jar = new JarFile(jarFile)) { - Enumeration enumEntries = jar.entries(); - while (enumEntries.hasMoreElements()) { - JarEntry file = enumEntries.nextElement(); - File f = new File(destination + File.separator + file.getName()); - if (file.isDirectory()) { - f.mkdir(); - continue; - } - - try (InputStream is = jar.getInputStream(file); FileOutputStream fos = new FileOutputStream(f)) { - while (is.available() > 0) { - fos.write(is.read()); - } - } - } - } - } - public static boolean isNullOrEmpty(String string) { return string == null || string.isEmpty(); } diff --git a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java index c01078555..c52a817a6 100644 --- a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java +++ b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java @@ -34,6 +34,7 @@ import fr.pilato.elasticsearch.crawler.fs.settings.ServerUrl; import fr.pilato.elasticsearch.crawler.fs.test.framework.AbstractFSCrawlerTestCase; import jakarta.ws.rs.ProcessingException; +import org.apache.commons.io.IOUtils; import org.apache.logging.log4j.Level; import org.hamcrest.Matcher; import org.junit.AfterClass; @@ -41,10 +42,10 @@ import org.junit.BeforeClass; import javax.net.ssl.SSLException; -import java.io.File; -import java.io.IOException; +import java.io.*; import java.net.SocketException; import java.net.URL; +import java.nio.charset.Charset; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.Path; @@ -52,9 +53,12 @@ import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.Collections; +import java.util.Enumeration; import java.util.Locale; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLength; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; @@ -213,9 +217,7 @@ private static void copyTestDocumentsToTargetDir(Path target, String sourceDirNa String jarFileWithProtocol = fileInJar.substring(0, i); // We remove the "file:" protocol String jarFile = jarFileWithProtocol.substring("file:".length()); - - staticLogger.info("-> Unzipping test documents from [{}] to [{}]", jarFile, target); - unzip(jarFile, target); + unzip(Path.of(jarFile), target, Charset.defaultCharset()); break; } default : @@ -224,6 +226,30 @@ private static void copyTestDocumentsToTargetDir(Path target, String sourceDirNa } } + private static void unzip(Path zip, Path outputFolder, Charset charset) throws IOException { + staticLogger.info("-> Unzipping test documents from [{}] to [{}]", zip, outputFolder); + + try (ZipFile zipFile = new ZipFile(zip.toFile(), ZipFile.OPEN_READ, charset)) { + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + Path entryPath = outputFolder.resolve(entry.getName()); + if (entryPath.normalize().startsWith(outputFolder.normalize())) { + if (entry.isDirectory()) { + Files.createDirectories(entryPath); + } else { + Files.createDirectories(entryPath.getParent()); + try (InputStream in = zipFile.getInputStream(entry)) { + try (OutputStream out = new FileOutputStream(entryPath.toFile())) { + IOUtils.copy(in, out); + } + } + } + } + } + } + } + @BeforeClass public static void startServices() throws IOException, ElasticsearchClientException { if (testClusterUrl == null) {