-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMAG_extract_checkM_results.py
46 lines (32 loc) · 1.1 KB
/
MAG_extract_checkM_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
import ast
import sys
import jak_utils
import pandas as pd
import argparse
parser = argparse.ArgumentParser(description='XXX')
parser.add_argument('-c',
'--checkm',
help="Path to bin_stats_ext.tsv file",
required=True)
parser.add_argument('-o',
'--out',
help="Path to write output to",
required=True)
args = parser.parse_args()
#
jak_utils.header()
results = [line.strip() for line in open(args.checkm)]
cols = ["genome", "Completeness", "Contamination", "Genome size", "# scaffolds",
"Longest scaffold", "N50 (scaffolds)", "marker lineage", "GC", "Coding density", "# markers"]
df = pd.DataFrame(columns=cols)
for line in results:
sample, js = line.split("\t")
# sample = sample + ".fa"
js = ast.literal_eval(js)
js['genome'] = sample
series = pd.Series(js)
df = pd.concat([df, series.to_frame().T], ignore_index=True)
# df = df.append(series, ignore_index=True)
df = df[df.columns.intersection(cols)]
df.to_csv(args.out, sep="\t", index=False)