From 9d4597ecd41d1717064130e2be120911627a9d20 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 10 Sep 2024 18:50:49 -0700 Subject: [PATCH 1/5] Add emerging subclades to nextflu-private builds Adds rules and configs to annotate emerging subclades to nextflu-private builds, paving the way for any builds that define the `emerging_subclade` path in their config to use the same clade definitions. Most of these emerging subclades map directly to the main branch in the corresponding influenza nomenclature repos. H3N2 HA is an exception where the definitions come from a branch called `emerging`. --- Snakefile | 15 +++++ profiles/nextflu-private.yaml | 3 + .../h1n1pdm/ha/auspice_config.json | 63 +++++++++++++++++++ .../h3n2/ha/auspice_config.json | 41 +++++++++++- .../vic/ha/auspice_config.json | 35 +++++++++++ workflow/snakemake_rules/core.smk | 37 +++++++++++ workflow/snakemake_rules/export.smk | 3 + 7 files changed, 196 insertions(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index ab19f5e8..a7bfa7eb 100644 --- a/Snakefile +++ b/Snakefile @@ -48,6 +48,21 @@ subclade_url_by_lineage_and_segment = { } } +emerging_subclade_url_by_lineage_and_segment = { + "h1n1pdm": { + "ha": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H1N1pdm_HA/main/.auto-generated/subclades.tsv", + "na": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H1N1pdm_NA/main/.auto-generated/subclades.tsv", + }, + "h3n2": { + "ha": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H3N2_HA/emerging/.auto-generated/subclades.tsv", + "na": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H3N2_NA/main/.auto-generated/subclades.tsv", + }, + "vic": { + "ha": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_B-Vic_HA/main/.auto-generated/subclades.tsv", + "na": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_B-Vic_NA/main/.auto-generated/subclades.tsv", + } +} + if "data_source" in config and config["data_source"]=='fauna': include: "workflow/snakemake_rules/download_from_fauna.smk" diff --git a/profiles/nextflu-private.yaml b/profiles/nextflu-private.yaml index 7fb770b0..4f4b10d0 100644 --- a/profiles/nextflu-private.yaml +++ b/profiles/nextflu-private.yaml @@ -56,6 +56,7 @@ builds: tree_exclude_sites: "config/h1n1pdm/{segment}/exclude-sites.txt" clades: "config/h1n1pdm/ha/clades.tsv" subclades: "config/h1n1pdm/{segment}/subclades.tsv" + emerging_subclades: "config/h1n1pdm/{segment}/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/h1n1pdm/{segment}/auspice_config.json" min_date: "2Y" reference_min_date: "6Y" @@ -104,6 +105,7 @@ builds: tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt" clades: "config/h3n2/ha/clades.tsv" subclades: "config/h3n2/{segment}/subclades.tsv" + emerging_subclades: "config/h3n2/{segment}/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/h3n2/{segment}/auspice_config.json" vaccines: "config/h3n2/vaccine.json" enable_glycosylation: true @@ -149,6 +151,7 @@ builds: tree_exclude_sites: "config/vic/{segment}/exclude-sites.txt" clades: "profiles/nextflu-private/vic/ha/clades.tsv" subclades: "config/vic/{segment}/subclades.tsv" + emerging_subclades: "config/h3n2/{segment}/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/vic/{segment}/auspice_config.json" min_date: "2Y" reference_min_date: "6Y" diff --git a/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json b/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json index 4a2b7e13..005106c9 100644 --- a/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json +++ b/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json @@ -129,6 +129,69 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "C.1", + "#492AB5" + ], + [ + "C.1.1", + "#3F4CCB" + ], + [ + "C.1.2", + "#4271CE" + ], + [ + "C.1.5", + "#4C8FC0" + ], + [ + "C.1.7", + "#5AA5A8" + ], + [ + "C.1.7.1", + "#6DB38A" + ], + [ + "C.1.7.2", + "#85BA6F" + ], + [ + "C.1.8", + "#A0BE59" + ], + [ + "C.1.9", + "#BBBC49" + ], + [ + "D", + "#D2B340" + ], + [ + "D.1", + "#E19F3A" + ], + [ + "D.2", + "#E68033" + ], + [ + "D.3", + "#E2562B" + ], + [ + "D.4", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", diff --git a/profiles/nextflu-private/h3n2/ha/auspice_config.json b/profiles/nextflu-private/h3n2/ha/auspice_config.json index 46e1707c..84abe5c1 100644 --- a/profiles/nextflu-private/h3n2/ha/auspice_config.json +++ b/profiles/nextflu-private/h3n2/ha/auspice_config.json @@ -141,6 +141,45 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "J", + "#4068CF" + ], + [ + "J.1", + "#5098B9" + ], + [ + "J.1.1", + "#6CB28C" + ], + [ + "J.2", + "#94BD62" + ], + [ + "J.2.1", + "#BFBB47" + ], + [ + "J.2.2", + "#DFA53B" + ], + [ + "J.3", + "#E67131" + ], + [ + "J.4", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", @@ -555,4 +594,4 @@ "entropy", "frequencies" ] -} +} \ No newline at end of file diff --git a/profiles/nextflu-private/vic/ha/auspice_config.json b/profiles/nextflu-private/vic/ha/auspice_config.json index b9884b3c..ea047f20 100644 --- a/profiles/nextflu-private/vic/ha/auspice_config.json +++ b/profiles/nextflu-private/vic/ha/auspice_config.json @@ -79,6 +79,41 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "C.2", + "#4272CE" + ], + [ + "C.3", + "#58A2AC" + ], + [ + "C.5", + "#7DB877" + ], + [ + "C.5.1", + "#AEBD50" + ], + [ + "C.5.4", + "#D8AE3E" + ], + [ + "C.5.6", + "#E67A32" + ], + [ + "C.5.7", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index 1efb4e4d..37bec68d 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -420,6 +420,43 @@ rule import_clades: --output {output.node_data} 2>&1 | tee {log} """ +rule download_emerging_subclades: + output: + subclades="config/{lineage}/{segment}/emerging_subclades.tsv", + conda: "../envs/nextstrain.yaml" + params: + url=lambda wildcards: emerging_subclade_url_by_lineage_and_segment.get(wildcards.lineage, {}).get(wildcards.segment), + shell: + """ + curl -o {output.subclades} "{params.url}" + """ + +rule emerging_subclades: + input: + tree = build_dir + "/{build_name}/{segment}/tree.nwk", + muts = build_dir + "/{build_name}/{segment}/muts.json", + clades = lambda wildcards: config["builds"][wildcards.build_name].get("emerging_subclades"), + output: + node_data = build_dir + "/{build_name}/{segment}/emerging_subclades.json", + params: + membership_name = "emerging_subclade", + label_name = "Emerging subclade", + conda: "../envs/nextstrain.yaml" + benchmark: + "benchmarks/emerging_subclades_{build_name}_{segment}.txt" + log: + "logs/emerging_subclades_{build_name}_{segment}.txt" + shell: + """ + augur clades \ + --tree {input.tree} \ + --mutations {input.muts} \ + --clades {input.clades} \ + --membership-name {params.membership_name} \ + --label-name {params.label_name:q} \ + --output {output.node_data} 2>&1 | tee {log} + """ + rule annotate_haplotypes: input: tree=build_dir + "/{build_name}/ha/tree.nwk", diff --git a/workflow/snakemake_rules/export.smk b/workflow/snakemake_rules/export.smk index e5d5391f..b9088382 100644 --- a/workflow/snakemake_rules/export.smk +++ b/workflow/snakemake_rules/export.smk @@ -23,6 +23,9 @@ def _get_node_data_by_wildcards(wildcards): if config["builds"][wildcards.build_name].get('subclades', False): inputs.append(rules.subclades.output.node_data) + if config["builds"][wildcards.build_name].get('emerging_subclades', False): + inputs.append(rules.emerging_subclades.output.node_data) + if config["builds"][wildcards.build_name].get('enable_titer_models', False) and wildcards.segment == 'ha': for collection in config["builds"][wildcards.build_name]["titer_collections"]: inputs.append(rules.titers_sub.output.titers_model.format(titer_collection=collection["name"], **wildcards_dict)) From 17ea783af922dad3a3130b07728e26e4da0f4785 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 11 Sep 2024 13:46:55 -0700 Subject: [PATCH 2/5] Add emerging subclades to H3N2 HA Nextclade config Adds emerging subclades as another "clade system" for the Nextclade workflow. This change allows us to annotate emerging subclades with the Nextclade dataset produced by this workflow. --- nextclade/config/config_dict.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextclade/config/config_dict.yaml b/nextclade/config/config_dict.yaml index ec1a7087..ecbc9f83 100644 --- a/nextclade/config/config_dict.yaml +++ b/nextclade/config/config_dict.yaml @@ -51,6 +51,9 @@ builds: short-clade: url: "seasonal_A-H3N2_HA/main/.auto-generated/clades.tsv" key: "short-clade" + emerging_subclade: + url: "seasonal_A-H3N2_HA/emerging/.auto-generated/subclades.tsv" + key: "emerging_subclade" refs: EPI1857216: filter: "--min-date 2019 --probabilistic-sampling --group-by year region --min-length 1500 --subsample-max-sequences 2000" From 685634883a1498a6c6653b0fffb603c30f0c03e9 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Fri, 13 Sep 2024 17:01:51 -0700 Subject: [PATCH 3/5] Add emerging subclades for H3N2 HA 2y public build --- config/h3n2/ha/auspice_config.json | 39 ++++++++++++++++++++++++++++++ profiles/nextstrain-public.yaml | 1 + 2 files changed, 40 insertions(+) diff --git a/config/h3n2/ha/auspice_config.json b/config/h3n2/ha/auspice_config.json index 1d7a787a..968694f2 100644 --- a/config/h3n2/ha/auspice_config.json +++ b/config/h3n2/ha/auspice_config.json @@ -75,6 +75,45 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "J", + "#4068CF" + ], + [ + "J.1", + "#5098B9" + ], + [ + "J.1.1", + "#6CB28C" + ], + [ + "J.2", + "#94BD62" + ], + [ + "J.2.1", + "#BFBB47" + ], + [ + "J.2.2", + "#DFA53B" + ], + [ + "J.3", + "#E67131" + ], + [ + "J.4", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", diff --git a/profiles/nextstrain-public.yaml b/profiles/nextstrain-public.yaml index dd124154..5a4206a4 100644 --- a/profiles/nextstrain-public.yaml +++ b/profiles/nextstrain-public.yaml @@ -205,6 +205,7 @@ array-builds: tree_exclude_sites: "config/{lineage}/{{segment}}/exclude-sites.txt" clades: "config/{lineage}/ha/clades.tsv" subclades: "config/{lineage}/{{segment}}/subclades.tsv" + emerging_subclades: "config/h3n2/{{segment}}/emerging_subclades.tsv" auspice_config: "config/{lineage}/{{segment}}/auspice_config.json" vaccines: "config/{lineage}/vaccine.json" enable_glycosylation: true From f671c89a9daadb0272af41683855723f9a2aa928 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Sat, 14 Sep 2024 08:26:45 -0700 Subject: [PATCH 4/5] Add emerging subclades to forecasts profile --- profiles/nextflu-private-forecasts.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/profiles/nextflu-private-forecasts.yaml b/profiles/nextflu-private-forecasts.yaml index e747d52d..0a8aac30 100644 --- a/profiles/nextflu-private-forecasts.yaml +++ b/profiles/nextflu-private-forecasts.yaml @@ -57,6 +57,7 @@ array-builds: tree_exclude_sites: "config/h3n2/{{segment}}/exclude-sites.txt" clades: "config/h3n2/ha/clades.tsv" subclades: "config/h3n2/ha/subclades.tsv" + emerging_subclades: "config/h3n2/ha/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/h3n2/ha/auspice_config.json" vaccines: "config/h3n2/vaccine.json" enable_titer_models: true From c23d20dbecca5265da878989d1046431825c02f0 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Mon, 16 Sep 2024 11:26:47 -0700 Subject: [PATCH 5/5] Set memory used by forecasting tips rule --- workflow/snakemake_rules/fitness.smk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflow/snakemake_rules/fitness.smk b/workflow/snakemake_rules/fitness.smk index 835430df..5ca923ec 100644 --- a/workflow/snakemake_rules/fitness.smk +++ b/workflow/snakemake_rules/fitness.smk @@ -380,6 +380,8 @@ rule forecast_tips: "benchmarks/forecast_tips_{build_name}_{segment}_{model}.txt" log: "logs/forecast_tips_{build_name}_{segment}_{model}.txt" + resources: + mem_mb=8000, shell: """ python3 flu-forecasting/src/forecast_model.py \