diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index 41363741a..a10ae4c29 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -40,8 +40,6 @@ inputs: # For each build we specify a subsampling scheme via an explicit key. # These subsampling schemes are defined at the bottom of this file. # (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level # # Auspice config is specified in rule auspice_config in export_for_nextstrain.smk builds: @@ -61,99 +59,99 @@ builds: subsampling_scheme: nextstrain_global_all_time title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused globally since pandemic start africa_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa over the past month africa_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa over the past 2 months africa_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa over the past 6 months africa_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Africa title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_1m + subsampling_scheme: nextstrain_region_1m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_2m + subsampling_scheme: nextstrain_region_2m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_6m + subsampling_scheme: nextstrain_region_6m region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_all_time + subsampling_scheme: nextstrain_region_all_time region: Asia title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Asia since pandemic start europe_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe over the past month europe_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe over the past 2 months europe_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe over the past 6 months europe_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Europe title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Europe since pandemic start north-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America over the past month north-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America over the past 2 months north-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America over the past 6 months north-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: North America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on North America since pandemic start oceania_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania over the past month oceania_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania over the past 2 months oceania_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania over the past 6 months oceania_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: Oceania title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on Oceania since pandemic start south-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America over the past month south-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America over the past 2 months south-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America over the past 6 months south-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: South America title: Evolution SARS-CoV-2 relative to clade 21L reference virus with subsampling focused on South America since pandemic start @@ -170,338 +168,120 @@ subsampling: group_by: "Nextstrain_clade" max_sequences: 300 - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_all_time: - # Focal samples for region - focal: - group_by: "division year month" - max_sequences: 3200 - exclude: "--exclude-where 'region!={region}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 800 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for region Asia over 1m + # Custom subsampling logic for a region over 1m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_1m: - # Early focal samples for Asia - asia_early: + nextstrain_region_1m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 1M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 1M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 2m + # Custom subsampling logic for a region over 2m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_2m: - # Early focal samples for Asia - asia_early: + nextstrain_region_2m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 2M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 2M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 6m + # Custom subsampling logic for a region over 6m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_6m: - # Early focal samples for Asia - asia_early: + nextstrain_region_6m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 6M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" max_sequences: 700 min_date: "--min-date 6M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over all-time + # Custom subsampling logic for a region over all-time # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_all_time: - # Focal samples for Asia - asia: + nextstrain_region_all_time: + # Focal samples for region + region: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 3500 - exclude: "--exclude-where 'region!=Asia'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 875 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 1m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_1m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_2m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_6m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_all_time: - # Focal samples for region - focal: - group_by: "country year month" - max_sequences: 3200 exclude: "--exclude-where 'region!={region}'" # Contextual samples from the rest of the world context: group_by: "country year month" - max_sequences: 800 + max_sequences: 875 exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index ab07dcc63..0030ede1d 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -33,8 +33,6 @@ inputs: # For each build we specify a subsampling scheme via an explicit key. # These subsampling schemes are defined at the bottom of this file. # (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level # # Auspice config is specified in rule auspice_config in export_for_nextstrain.smk builds: @@ -54,99 +52,99 @@ builds: subsampling_scheme: nextstrain_global_all_time title: Genomic epidemiology of SARS-CoV-2 with subsampling focused globally since pandemic start africa_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past month africa_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 2 months africa_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 6 months africa_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_1m + subsampling_scheme: nextstrain_region_1m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_2m + subsampling_scheme: nextstrain_region_2m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_6m + subsampling_scheme: nextstrain_region_6m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_all_time + subsampling_scheme: nextstrain_region_all_time region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia since pandemic start europe_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past month europe_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 2 months europe_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 6 months europe_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe since pandemic start north-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past month north-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 2 months north-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 6 months north-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America since pandemic start oceania_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past month oceania_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 2 months oceania_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 6 months oceania_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania since pandemic start south-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past month south-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 2 months south-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 6 months south-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America since pandemic start @@ -162,338 +160,120 @@ subsampling: group_by: "Nextstrain_clade" max_sequences: 300 - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_all_time: - # Focal samples for region - focal: - group_by: "division year month" - max_sequences: 3200 - exclude: "--exclude-where 'region!={region}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 800 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for region Asia over 1m + # Custom subsampling logic for a region over 1m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_1m: - # Early focal samples for Asia - asia_early: + nextstrain_region_1m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 1M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 1M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 2m + # Custom subsampling logic for a region over 2m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_2m: - # Early focal samples for Asia - asia_early: + nextstrain_region_2m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 2M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 2M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 6m + # Custom subsampling logic for a region over 6m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_6m: - # Early focal samples for Asia - asia_early: + nextstrain_region_6m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 6M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" max_sequences: 700 min_date: "--min-date 6M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over all-time + # Custom subsampling logic for a region over all-time # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_all_time: - # Focal samples for Asia - asia: + nextstrain_region_all_time: + # Focal samples for region + region: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 3500 - exclude: "--exclude-where 'region!=Asia'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 875 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 1m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_1m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_2m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_6m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_all_time: - # Focal samples for region - focal: - group_by: "country year month" - max_sequences: 3200 exclude: "--exclude-where 'region!={region}'" # Contextual samples from the rest of the world context: group_by: "country year month" - max_sequences: 800 + max_sequences: 875 exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index e39f59da7..74d40785a 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -33,8 +33,6 @@ inputs: # For each build we specify a subsampling scheme via an explicit key. # These subsampling schemes are defined at the bottom of this file. # (They override the defaults) -# North America and Oceania are subsampled at the "division" level -# Africa, Asia, Europe and South America are subsampled at the "country" level # # Auspice config is specified in rule auspice_config in export_for_nextstrain.smk builds: @@ -54,99 +52,99 @@ builds: subsampling_scheme: nextstrain_global_all_time title: Genomic epidemiology of SARS-CoV-2 with subsampling focused globally since pandemic start africa_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past month africa_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 2 months africa_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa over the past 6 months africa_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Africa title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Africa since pandemic start asia_1m: - subsampling_scheme: nextstrain_region_asia_1m + subsampling_scheme: nextstrain_region_1m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past month asia_2m: - subsampling_scheme: nextstrain_region_asia_2m + subsampling_scheme: nextstrain_region_2m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 2 months asia_6m: - subsampling_scheme: nextstrain_region_asia_6m + subsampling_scheme: nextstrain_region_6m region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia over the past 6 months asia_all-time: - subsampling_scheme: nextstrain_region_asia_all_time + subsampling_scheme: nextstrain_region_all_time region: Asia title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Asia since pandemic start europe_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past month europe_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 2 months europe_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe over the past 6 months europe_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: Europe title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Europe since pandemic start north-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past month north-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 2 months north-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America over the past 6 months north-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: North America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on North America since pandemic start oceania_1m: - subsampling_scheme: nextstrain_region_grouped_by_division_1m + subsampling_scheme: nextstrain_region_1m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past month oceania_2m: - subsampling_scheme: nextstrain_region_grouped_by_division_2m + subsampling_scheme: nextstrain_region_2m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 2 months oceania_6m: - subsampling_scheme: nextstrain_region_grouped_by_division_6m + subsampling_scheme: nextstrain_region_6m region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania over the past 6 months oceania_all-time: - subsampling_scheme: nextstrain_region_grouped_by_division_all_time + subsampling_scheme: nextstrain_region_all_time region: Oceania title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on Oceania since pandemic start south-america_1m: - subsampling_scheme: nextstrain_region_grouped_by_country_1m + subsampling_scheme: nextstrain_region_1m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past month south-america_2m: - subsampling_scheme: nextstrain_region_grouped_by_country_2m + subsampling_scheme: nextstrain_region_2m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 2 months south-america_6m: - subsampling_scheme: nextstrain_region_grouped_by_country_6m + subsampling_scheme: nextstrain_region_6m region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America over the past 6 months south-america_all-time: - subsampling_scheme: nextstrain_region_grouped_by_country_all_time + subsampling_scheme: nextstrain_region_all_time region: South America title: Genomic epidemiology of SARS-CoV-2 with subsampling focused on South America since pandemic start @@ -162,338 +160,120 @@ subsampling: group_by: "Nextstrain_clade" max_sequences: 300 - # Custom subsampling logic for regions over 1m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_1m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_2m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_6m: - # Early focal samples for region - focal_early: - group_by: "division year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "division year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by division for North America and Oceania - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_division_all_time: - # Focal samples for region - focal: - group_by: "division year month" - max_sequences: 3200 - exclude: "--exclude-where 'region!={region}'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 800 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for region Asia over 1m + # Custom subsampling logic for a region over 1m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_1m: - # Early focal samples for Asia - asia_early: + nextstrain_region_1m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 1M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 1M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 1M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 2m + # Custom subsampling logic for a region over 2m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_2m: - # Early focal samples for Asia - asia_early: + nextstrain_region_2m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 2M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country week" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 2M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country week" max_sequences: 700 min_date: "--min-date 2M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over 6m + # Custom subsampling logic for a region over 6m # Grouping by country weighted by population size # 4375 total # 4:1 ratio of recent to early # 4:1 ratio of focal to context - nextstrain_region_asia_6m: - # Early focal samples for Asia - asia_early: + nextstrain_region_6m: + # Early focal samples for region + region_early: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 700 max_date: "--max-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_early: group_by: "country year month" max_sequences: 175 max_date: "--max-date 6M" - exclude: "--exclude-where 'region=Asia'" - # Recent focal samples for Asia - asia_recent: + exclude: "--exclude-where 'region={region}'" + # Recent focal samples for region + region_recent: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 2800 min_date: "--min-date 6M" - exclude: "--exclude-where 'region!=Asia'" + exclude: "--exclude-where 'region!={region}'" # Early contextual samples from the rest of the world context_recent: group_by: "country year month" max_sequences: 700 min_date: "--min-date 6M" - exclude: "--exclude-where 'region=Asia'" + exclude: "--exclude-where 'region={region}'" - # Custom subsampling logic for region Asia over all-time + # Custom subsampling logic for a region over all-time # Grouping by country weighted by population size # 4375 total # 4:1 ratio of focal to context - nextstrain_region_asia_all_time: - # Focal samples for Asia - asia: + nextstrain_region_all_time: + # Focal samples for region + region: group_by: "country year month" group_by_weights: "defaults/population_weights.tsv" max_sequences: 3500 - exclude: "--exclude-where 'region!=Asia'" - # Contextual samples from the rest of the world - context: - group_by: "country year month" - max_sequences: 875 - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 1m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_1m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 1M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 1M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 2m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_2m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 2M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country week" - max_sequences: 2560 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country week" - max_sequences: 640 - min_date: "--min-date 2M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over 6m - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of recent to early - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_6m: - # Early focal samples for region - focal_early: - group_by: "country year month" - max_sequences: 640 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_early: - group_by: "country year month" - max_sequences: 160 - max_date: "--max-date 6M" - exclude: "--exclude-where 'region={region}'" - # Recent focal samples for region - focal_recent: - group_by: "country year month" - max_sequences: 2560 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region!={region}'" - # Early contextual samples from the rest of the world - context_recent: - group_by: "country year month" - max_sequences: 640 - min_date: "--min-date 6M" - exclude: "--exclude-where 'region={region}'" - - # Custom subsampling logic for regions over all-time - # Grouping by country for Africa, Asia, Europe and South America - # 4000 total - # 4:1 ratio of focal to context - nextstrain_region_grouped_by_country_all_time: - # Focal samples for region - focal: - group_by: "country year month" - max_sequences: 3200 exclude: "--exclude-where 'region!={region}'" # Contextual samples from the rest of the world context: group_by: "country year month" - max_sequences: 800 + max_sequences: 875 exclude: "--exclude-where 'region={region}'" # Custom subsampling logic for global region over 1m