fix header column collation with filename

CountESS-Project · Apr 24, 2024 · c6bae2c · c6bae2c
1 parent ac03b69
commit c6bae2c
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/countess/plugins/fastq.py b/countess/plugins/fastq.py
@@ -47,16 +47,17 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None):
 
         group_columns = ["sequence"]
 
-        if self.parameters["header_column"].value:
+        if not self.parameters["header_column"].value:
+            dataframe.drop(columns="header", inplace=True)
+        elif self.parameters["group"].value:
+            # if we've got a header column and we're grouping by sequence,
             # find maximum common length of the 'header' field in this file
             for common_length in range(0, dataframe["header"].str.len().min() - 1):
                 if dataframe["header"].str.slice(0, common_length + 1).nunique() > 1:
                     break
             if common_length > 0:
                 dataframe["header"] = dataframe["header"].str.slice(0, common_length)
                 group_columns.append("header")
-        else:
-            dataframe.drop(columns="header", inplace=True)
 
         if self.parameters["filename_column"].value:
             group_columns.append("filename")