-
Notifications
You must be signed in to change notification settings - Fork 0
/
variant_qc.sh
72 lines (57 loc) · 2.79 KB
/
variant_qc.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
# get "all" filters (PTV paper)
ukbb_dir=""
ukb2_dir=""
zcat ${ukbb_dir}/variant_filtering/variant_filter_table.tsv.gz | awk -F'\t' '($30 != 0){print $5}' > not_all_filters.txt
# 1a. array-specific LD pruning and missingness (Axiom)
plink2 --pfile ${ukb2_dir}/cal/pgen/ukb24983_cal_cALL_v2 \
--keep ${ukb2_dir}/sqc/population_stratification/ukb24983_white_british.phe \
--keep-fam ${ukb2_dir}/sqc/axiom_individuals.txt \
--exclude not_all_filters.txt \
--extract ${ukb2_dir}/sqc/axiom_specific_variants.txt \
--maf 1e-4 --geno 0.05 \
--indep-pairwise 50 5 0.5 \
--out ../reference/temp_axiom
# keep-fam is equivalent to keep, and is used because there is no --keep-intersect
# 1b. array-specific LD pruning and missingness (BiLEVE)
plink2 --pfile ${ukb2_dir}/cal/pgen/ukb24983_cal_cALL_v2 \
--keep ${ukb2_dir}/sqc/population_stratification/ukb24983_white_british.phe \
--keep-fam ${ukb2_dir}/sqc/bileve_individuals.txt \
--exclude not_all_filters.txt \
--extract${ukb2_dir}/sqc/bileve_specific_variants.txt \
--maf 1e-4 --geno 0.05 \
--indep-pairwise 50 5 0.5 \
--out ../reference/temp_bileve
# 1c. array-specific LD pruning and missingness (CNV)
plink2 --pfile ${ukb2_dir}/cnv/pgen/cnv \
--keep ${ukb2_dir}/population_stratification/ukb24983_white_british.phe \
--maf 1e-4 --geno 0.05 \
--indep-pairwise 50 5 0.5 \
--out ../reference/temp_cnv
# 1d. array-specific LD pruning and missingness (HLA)
plink2 --pfile ${ukb2_dir}/hla/pgen/ukb_hla_v3 \
--keep ${ukb2_dir}/sqc/population_stratification/ukb24983_white_british.phe \
--maf 1e-4 --geno 0.05 \
--indep-pairwise 50 5 0.5 \
--out ../reference/temp_hla
# combine
cat ../reference/temp_*.prune.in > ../reference/array_specific_filters.txt
# 2. missingness and AF for variants on both arrays
plink2 --pfile ${ukb2_dir}/cal/pgen/ukb24983_cal_cALL_v2 \
--keep ${ukb2_dir}/sqc/population_stratification/ukb24983_white_british.phe \
--exclude not_all_filters.txt \
--extract ${ukb2_dir}/sqc/both_array_variants.txt \
--maf 1e-4 --geno 0.05 --write-snplist \
--out ../reference/shared_filter
# 3. Combine array/cnv/hla-specific filters, and missingness from shared variants
# then add a more aggressitve MAF cutoff and re-do LD pruning
plink2 --pfile ${ukb2_dir}/array_combined/pgen/ukb24983_cal_hla_cnv \
--keep ${ukb2_dir}/sqc/population_stratification/ukb24983_white_british.phe \
--exclude not_all_filters.txt \
--extract ../reference/array_specific_filters.txt ../reference/shared_filter.snplist \
--indep-pairwise 50 5 0.5 \
--maf 1e-4 \
--memory 30000 \
--out ../reference/variant_qc_v2
# clean up intermediates
rm ../reference/temp_*