From 3780fc08503588019c1601e37ea9fe27ab2d3b28 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 15 Oct 2024 10:29:33 -0700 Subject: [PATCH 1/2] Run Nextclade on all 8 genes for H1 and H3 Adds remaining gene segments to the default list of segments to process with Nextclade. Since B/Vic only has HA and NA Nextclades datasets right now, this commit modifies the logic of the workflow to look for a build-specific list of segments and defines that list for the Vic build. --- profiles/nextclade.yaml | 9 +++++++++ profiles/nextclade/run-nextclade.smk | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/profiles/nextclade.yaml b/profiles/nextclade.yaml index 44346bc3..0396cb18 100644 --- a/profiles/nextclade.yaml +++ b/profiles/nextclade.yaml @@ -7,6 +7,12 @@ s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu" segments: - ha - na + - pb2 + - pb1 + - pa + - np + - mp + - ns builds: h1n1pdm: @@ -15,3 +21,6 @@ builds: lineage: h3n2 vic: lineage: vic + segments: + - ha + - na diff --git a/profiles/nextclade/run-nextclade.smk b/profiles/nextclade/run-nextclade.smk index d4a8c41e..e1a4445e 100644 --- a/profiles/nextclade/run-nextclade.smk +++ b/profiles/nextclade/run-nextclade.smk @@ -4,7 +4,7 @@ rule upload_all_nextclade_files: "data/upload/s3/{filetype}_{lineage}_{segment}.done".format(filetype=filetype, lineage=build["lineage"], segment=segment) for filetype in ("alignment", "nextclade") for build in config["builds"].values() - for segment in config["segments"] + for segment in build.get("segments", config["segments"]) ] rule get_nextclade_dataset_for_lineage_and_segment: From 582236f7b752065430c972286f42b1e791897da1 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 15 Oct 2024 10:44:25 -0700 Subject: [PATCH 2/2] Use full Nextclade names for all gene segments Not all gene segments define shorter aliases (e.g., "flu_h3n2_ha"), so we need to use the full names (e.g., "nextstrain/flu/h3n2/ha") to download each dataset. --- profiles/nextclade/run-nextclade.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiles/nextclade/run-nextclade.smk b/profiles/nextclade/run-nextclade.smk index e1a4445e..4adfdebb 100644 --- a/profiles/nextclade/run-nextclade.smk +++ b/profiles/nextclade/run-nextclade.smk @@ -13,7 +13,7 @@ rule get_nextclade_dataset_for_lineage_and_segment: shell: """ nextclade3 dataset get \ - -n flu_{wildcards.lineage}_{wildcards.segment} \ + -n 'nextstrain/flu/{wildcards.lineage}/{wildcards.segment}' \ --output-dir {output.nextclade_dir} """