-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.yaml
130 lines (112 loc) · 5.66 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#================================================================
# Basic Settings
#================================================================
experiment_name: 'ALLEGRO_EXAMPLE_RUN'
# ---
# ---------------------------------------------------------------
# Path Settings - Ignored if Easy Mode has a value
# ---------------------------------------------------------------
# Ignored if Easy Mode input_csv_path_with_guides above has a value
input_directory: 'data/input/example_input'
input_species_path: 'data/input/fifty_example_input_species.csv'
input_species_path_column: 'ortho_file_name'
# ---------------------------------------------------------------
# String value, default: 'track_e'
# track_a: any of the fasta records can be targeted. There will be multiplicity targets per fasta record.
# track_e: each fasta record has to be targeted at least multiplicity times,
track: 'track_e'
# ---
# Integer value, default: 1
# In track_a, each species needs to be targeted at least this many times ANYWHERE.
# In track_e, EACH gene/record needs to be targeted at least this many times in that gene/record.
multiplicity: 1
# ---
# Possible values: True or False. Default: True
# Remove guides with > gc_max and < gc_min from consideration?
filter_by_gc: False
gc_max: 0.7 # Only works if filter_by_gc is True.
gc_min: 0.3 # Only works if filter_by_gc is True.
# ---
# ---------------------------------------------------------------
# Easy Mode - Has priority over Path Settings
# ---------------------------------------------------------------
input_csv_path_with_guides: '' # Default: ''
#================================================================
# Advanced Settings
# ===============================================================
# Possible choices: 'dummy' (default), 'ucrispr'
# scorer: 'ucrispr' uses a faster implementation of zhang2019
# dummy assigns a score of 1.0 to all gRNAs, essentially treats all guides as the same.
scorer: 'dummy'
# ---
# Integer value, default: 0
beta: 0 # The final size of the gRNAs set must be <= than this. Think of it as your budget.
# Setting to 0 disables beta and causes ALLEGRO to find the smallest gRNA set IGNORING scores
# (treats all of the gRNAs as equals).
# If set to the number of input species, the final size of the set may be up to
# the number of species you have (worst case, one gRNA per species).
# If set to a number HIGHER than the number of species, finds the best #beta gRNAs.
# ---
# List of strings, Default: ['']
# ALLEGRO will output guides that do not contain any of the patterns in this list.
# Supports up to 5 chained IUPAC codes; e.g., 'RYSN'
# Exception to the 5 rule above is when positional nucleotides are used
# in conjunction with 'N's. E.g., NNNNNNNCNNNNGNNNN will exclude guides
# with C and G in those positions.
# Supports individual nucleotides; e.g., 'TTTT' excludes guides with quad-T in their seq.
# Be careful not to place common nucleotides or IUPAC codes here such as just 'A' or 'AG'
# You may end up excluding most or all guides from the calculation.
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3568203/#:~:text=For%20vertebrates%2C%20as,of%20these%20species.
patterns_to_exclude: ['TTTT']
# ---
# Boolean: True or False. Default: False
# Significantly affects running time.
# True reports gRNA with off-targets.
# report_up_to_n_mismatches reports gRNA with fewer <= N mismatches after the seed region.
output_offtargets: False
report_up_to_n_mismatches: 3 # This may be [0-3]
# The column in the input csv file with the name of the
# background fasta to check off-targets against
input_species_offtarget_dir: 'data/input/example_input/'
input_species_offtarget_column: 'ortho_file_name'
# ---
# Boolean: True or False. Default: False
# Affects running time performance.
# Allows a guide within up to the set number of mismatches (after the seed region) of another guide
# to "inherit" the second guide's targets, essentially rendering the second guide useless
# and reducing the total guides needed.
# Works best when unscored guides are present as it does not consider scores.
# Uses seed_region_is_n_upstream_of_pam and mismatches_allowed_after_seed_region
preclustering: False
# Boolean: True or False. Default: False
# Affects running time performance.
# Compresses the output gRNA set by clustering similar gRNAs.
# Adds a new column called 'cluster' to output/EXPERIMENT_NAME/EXPERIMENT_NAME.csv
# Uses seed_region_is_n_upstream_of_pam and mismatches_allowed_after_seed_region
postclustering: False
seed_region_is_n_upstream_of_pam: 12
mismatches_allowed_after_seed_region: 3 # Integer value, default: 2
# Integer value, measured in seconds, default: 60
# Only used in solving the ILP if there are remaining feasible guides with
# fractional values after solving the LP.
# Stop searching for an optimal solution after this many seconds.
early_stopping_patience: 60
# ---
# Integer value. Default: 3
# A higher value sacrifices more running time for lower memory consumption.
# A preprocessing step that removes redundant guides.
# Use if you need to save memory.
# Max value is the total number of genes if using track E, and
# the total number of species if using track A.
mp_threshold: 0
# ---
# Boolean: True or False. Default: True
# When a problem is deemed unsolvable (e.g., Status: MPSOLVER_INFEASIBLE)
# Enabling diagnostics will attempt to relax each constraint and resolve the problem.
# If the new problem with the relaxed constraint is solvable, ALLEGRO outputs
# the culprit gene/species.
# Currently, to stop this process, you need to find the PID of
# the python process running ALLEGRO using: $ top
# and kill it manually: $ kill -SIGKILL PID
enable_solver_diagnostics: True
# ---