code works

aehrc · Apr 4, 2024 · da88fa9 · da88fa9
1 parent 7b6df96
commit da88fa9
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 8 deletions.
diff --git a/src/main/java/au/csiro/variantspark/utils/FileUtils.java b/src/main/java/au/csiro/variantspark/utils/FileUtils.java
@@ -12,23 +12,22 @@ public class FileUtils {
 	 * @param file: an input file
 	 * @return true if input file is BGZIP by check the first two byte of input file 
 	 */	
-	public static boolean isInputBGZ(final File file) {
-
+	public static boolean isBGZFile(String filePath) {
 		/**
-		 * .vcf.bgz is type of GZP file
+		 * .vcf.bgz is type of GZP file, work well with BlockCompressedInputStream
 		 * .vcf.gz is also GZP file but get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
 		 * .vcf.bz2 is not GZP file and get java.lang.OutOfMemoryError at java.io.InputStreamReader.read(InputStreamReader.java:184)
 		 * .vcf is not GZP file and get htsjdk.samtools.SAMFormatException: at header from java.io.BufferedReader.readLine(BufferedReader.java:389)
-		*/				
-	    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file))) {
+		*/
+	    try (BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(filePath))) {
 	        bufferedInputStream.mark(100); // mark the current position
 	        boolean isValid = BlockCompressedInputStream.isValidFile(bufferedInputStream);
 	        bufferedInputStream.reset(); // reset back to the marked position
 	        return isValid;
 	    } catch (IOException e) {
 	        // Handle the exception
 	        return false;
-	    }				
+	    }
 	}
 
 	/**

diff --git a/src/main/scala/au/csiro/variantspark/cli/args/SparkArgs.scala b/src/main/scala/au/csiro/variantspark/cli/args/SparkArgs.scala
@@ -15,9 +15,9 @@ trait SparkArgs extends SparkApp {
   val sparkPar: Int = 0
 
   def textFile(inputFile: String): RDD[String] = {
-    val isBGZ = FileUtils.isInputBGZ(new File(inputFile))
+    val isBGZ = FileUtils.isBGZFile(inputFile)
     println(inputFile + " is loading to spark RDD, isBGZFile: " + isBGZ)
-    if (isBGZ ) {
+    if (isBGZ) {
       val path = new Path(inputFile)
       val fs = path.getFileSystem(sc.hadoopConfiguration)
       val bgzInputStream = new BlockCompressedInputStream(fs.open(path))