Skip to content

Commit

Permalink
fix header column collation with filename
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Apr 24, 2024
1 parent ac03b69 commit c6bae2c
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions countess/plugins/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,17 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None):

group_columns = ["sequence"]

if self.parameters["header_column"].value:
if not self.parameters["header_column"].value:
dataframe.drop(columns="header", inplace=True)
elif self.parameters["group"].value:
# if we've got a header column and we're grouping by sequence,
# find maximum common length of the 'header' field in this file
for common_length in range(0, dataframe["header"].str.len().min() - 1):
if dataframe["header"].str.slice(0, common_length + 1).nunique() > 1:
break
if common_length > 0:
dataframe["header"] = dataframe["header"].str.slice(0, common_length)
group_columns.append("header")
else:
dataframe.drop(columns="header", inplace=True)

if self.parameters["filename_column"].value:
group_columns.append("filename")
Expand Down

0 comments on commit c6bae2c

Please sign in to comment.