From d6de73450b1c04d448e3ef3f3dfa310b57235eb8 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:19:58 -0700 Subject: [PATCH] Use population-based weighted sampling for other regional builds Extend the weighted sampling approach from Asia builds to other regional builds. This comes with the added benefit of reducing redundancy in subsampling schemes. --- .../nextstrain-gisaid-21L/builds.yaml | 338 +++--------------- .../nextstrain-gisaid/builds.yaml | 338 +++--------------- .../nextstrain-open/builds.yaml | 338 +++--------------- 3 files changed, 177 insertions(+), 837 deletions(-) diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index 41363741a..a10ae4c29 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -40,8 +40,6 @@ inputs: # For each build we specify a subsampling scheme via an explicit key. # These subsampling schemes are defined at the bottom of this file. # (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level # # Auspice config is specified in rule auspice_config in export_for_nextstrain.smk builds: @@ -61,99 +59,99 @@ builds: subsampling_scheme: nextstrain_global_all_time title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused globally since pandemic start africa_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa over the past month africa_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa over the past 2 months africa_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa over the past 6 months africa_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_1m + subsampling_scheme: nextstrain_region_1m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_2m + subsampling_scheme: nextstrain_region_2m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_6m + subsampling_scheme: nextstrain_region_6m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_all_time + subsampling_scheme: nextstrain_region_all_time region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia since pandemic start europe_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe over the past month europe_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe over the past 2 months europe_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe over the past 6 months europe_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe since pandemic start north-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America over the past month north-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America over the past 2 months north-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America over the past 6 months north-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America since pandemic start oceania_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania over the past month oceania_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania over the past 2 months oceania_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania over the past 6 months oceania_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania since pandemic start south-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America over the past month south-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America over the past 2 months south-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America over the past 6 months south-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America since pandemic start @@ -170,338 +168,120 @@ subsampling: group_by: "Nextstrain_clade" max_sequences: 300 - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_all_time: - # Focal samples for region - focal: - group_by: "division year month" - max_sequences: 3200 - exclude: "--exclude-where 'region!={region}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 800 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for region Asia over 1m + # Custom subsampling logic for a region over 1m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_1m: - # Early focal samples for Asia - asia_early: + nextstrain_region_1m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 1M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 1M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 2m + # Custom subsampling logic for a region over 2m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_2m: - # Early focal samples for Asia - asia_early: + nextstrain_region_2m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 2M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 2M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 6m + # Custom subsampling logic for a region over 6m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_6m: - # Early focal samples for Asia - asia_early: + nextstrain_region_6m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 6M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" max_sequences: 700 min_date: "--min-date 6M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over all-time + # Custom subsampling logic for a region over all-time # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_all_time: - # Focal samples for Asia - asia: + nextstrain_region_all_time: + # Focal samples for region + region: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 3500 - exclude: "--exclude-where 'region!=Asia'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 875 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 1m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_1m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_2m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_6m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_all_time: - # Focal samples for region - focal: - group_by: "country year month" - max_sequences: 3200 exclude: "--exclude-where 'region!={region}'" # Contextual samples from the rest of the world context: group_by: "country year month" - max_sequences: 800 + max_sequences: 875 exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index ab07dcc63..0030ede1d 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -33,8 +33,6 @@ inputs: # For each build we specify a subsampling scheme via an explicit key. # These subsampling schemes are defined at the bottom of this file. # (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level # # Auspice config is specified in rule auspice_config in export_for_nextstrain.smk builds: @@ -54,99 +52,99 @@ builds: subsampling_scheme: nextstrain_global_all_time title: Genomic epidemiology of SARS-CoV-2 with subsampling focused globally since pandemic start africa_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past month africa_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 2 months africa_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 6 months africa_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_1m + subsampling_scheme: nextstrain_region_1m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_2m + subsampling_scheme: nextstrain_region_2m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_6m + subsampling_scheme: nextstrain_region_6m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_all_time + subsampling_scheme: nextstrain_region_all_time region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia since pandemic start europe_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past month europe_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 2 months europe_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 6 months europe_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe since pandemic start north-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past month north-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 2 months north-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 6 months north-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America since pandemic start oceania_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past month oceania_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 2 months oceania_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 6 months oceania_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania since pandemic start south-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past month south-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 2 months south-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 6 months south-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America since pandemic start @@ -162,338 +160,120 @@ subsampling: group_by: "Nextstrain_clade" max_sequences: 300 - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_all_time: - # Focal samples for region - focal: - group_by: "division year month" - max_sequences: 3200 - exclude: "--exclude-where 'region!={region}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 800 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for region Asia over 1m + # Custom subsampling logic for a region over 1m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_1m: - # Early focal samples for Asia - asia_early: + nextstrain_region_1m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 1M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 1M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 2m + # Custom subsampling logic for a region over 2m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_2m: - # Early focal samples for Asia - asia_early: + nextstrain_region_2m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 2M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 2M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 6m + # Custom subsampling logic for a region over 6m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_6m: - # Early focal samples for Asia - asia_early: + nextstrain_region_6m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 6M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" max_sequences: 700 min_date: "--min-date 6M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over all-time + # Custom subsampling logic for a region over all-time # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_all_time: - # Focal samples for Asia - asia: + nextstrain_region_all_time: + # Focal samples for region + region: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 3500 - exclude: "--exclude-where 'region!=Asia'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 875 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 1m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_1m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_2m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_6m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_all_time: - # Focal samples for region - focal: - group_by: "country year month" - max_sequences: 3200 exclude: "--exclude-where 'region!={region}'" # Contextual samples from the rest of the world context: group_by: "country year month" - max_sequences: 800 + max_sequences: 875 exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index e39f59da7..74d40785a 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -33,8 +33,6 @@ inputs: # For each build we specify a subsampling scheme via an explicit key. # These subsampling schemes are defined at the bottom of this file. # (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level # # Auspice config is specified in rule auspice_config in export_for_nextstrain.smk builds: @@ -54,99 +52,99 @@ builds: subsampling_scheme: nextstrain_global_all_time title: Genomic epidemiology of SARS-CoV-2 with subsampling focused globally since pandemic start africa_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past month africa_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 2 months africa_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 6 months africa_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_1m + subsampling_scheme: nextstrain_region_1m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_2m + subsampling_scheme: nextstrain_region_2m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_6m + subsampling_scheme: nextstrain_region_6m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_all_time + subsampling_scheme: nextstrain_region_all_time region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia since pandemic start europe_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past month europe_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 2 months europe_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 6 months europe_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe since pandemic start north-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past month north-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 2 months north-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 6 months north-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America since pandemic start oceania_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past month oceania_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 2 months oceania_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 6 months oceania_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania since pandemic start south-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past month south-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 2 months south-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 6 months south-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America since pandemic start @@ -162,338 +160,120 @@ subsampling: group_by: "Nextstrain_clade" max_sequences: 300 - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_all_time: - # Focal samples for region - focal: - group_by: "division year month" - max_sequences: 3200 - exclude: "--exclude-where 'region!={region}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 800 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for region Asia over 1m + # Custom subsampling logic for a region over 1m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_1m: - # Early focal samples for Asia - asia_early: + nextstrain_region_1m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 1M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 1M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 2m + # Custom subsampling logic for a region over 2m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_2m: - # Early focal samples for Asia - asia_early: + nextstrain_region_2m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 2M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 2M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 6m + # Custom subsampling logic for a region over 6m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_6m: - # Early focal samples for Asia - asia_early: + nextstrain_region_6m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 6M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" max_sequences: 700 min_date: "--min-date 6M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over all-time + # Custom subsampling logic for a region over all-time # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_all_time: - # Focal samples for Asia - asia: + nextstrain_region_all_time: + # Focal samples for region + region: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 3500 - exclude: "--exclude-where 'region!=Asia'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 875 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 1m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_1m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_2m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_6m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_all_time: - # Focal samples for region - focal: - group_by: "country year month" - max_sequences: 3200 exclude: "--exclude-where 'region!={region}'" # Contextual samples from the rest of the world context: group_by: "country year month" - max_sequences: 800 + max_sequences: 875 exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m