forked from joyceyiyiwang/Portability_Questions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
04b_convert_plink2_glm_to_plink1.py
48 lines (42 loc) · 1.56 KB
/
04b_convert_plink2_glm_to_plink1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import argparse
import pandas as pd
def convert_dataframe(dataframe):
return (
dataframe
.rename(columns={
'#CHROM': '#CHR',
'POS': 'BP',
'ID': 'SNP',
'OBS_CT': 'NMISS',
'T_STAT': 'STAT',
})
.filter(items=['#CHR', 'SNP', 'BP', 'A1', 'TEST', 'NMISS', 'BETA',
'STAT', 'P', 'SE'])
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=(
'Convert a Plink 2 association output (.glm.linear) format to '
'Plink 1 association output format (.assoc)'))
parser.add_argument('path', help=('path to a Plink 2 GWAS output file '
'(.glm.linear or .glm.logistic) '
'to be converted'), nargs='+')
parser.add_argument('-o', '--output',
help=('path to the Plink 1 output file to be created'))
args = parser.parse_args()
# If multiple paths were passed, add them all to a single table
if len(args.path) > 1:
complete_df = pd.DataFrame()
for path in args.path:
df = pd.read_csv(path, sep='\t').pipe(convert_dataframe)
complete_df = pd.concat([complete_df, df])
# If just one path passed, just convert and save
else:
complete_df = (
pd.read_csv(args.path[0], sep='\t')
.pipe(convert_dataframe)
)
(
complete_df
.drop_duplicates(subset=['SNP'])
.to_csv(args.output, index=False, sep='\t', float_format='%.9g')
)