Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
lemieuxl committed Oct 26, 2015
2 parents 096c264 + 0ea2647 commit 64ba7ba
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 39 deletions.
94 changes: 56 additions & 38 deletions pyGenClean/run_data_clean_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,9 @@ def run_duplicated_samples(in_prefix, in_type, out_prefix, base_dir, options):

if len(not_good_still) > 0:
text = latex_template.textbf(
"There {} {:,d} sample{} that {} not good enough for "
"completion, but {} still selected as the best "
"duplicate (see Table~{}).".format(
"There {} {:,d} sample{} that {} not good due to low "
"completion or concordance, but {} still selected as "
"the best duplicate (see Table~{}).".format(
"were" if len(not_good_still) > 1 else "was",
len(not_good_still),
"s" if len(not_good_still) > 1 else "",
Expand Down Expand Up @@ -592,12 +592,35 @@ def run_duplicated_snps(in_prefix, in_type, out_prefix, base_dir, options):
for marker_id in removed_markers:
print >>o_file, marker_id + "\t" + "removed duplicate"

# Reading the markers with problem
problematic_markers = set()
if os.path.isfile(script_prefix + ".problems"):
with open(script_prefix + ".problems", "r") as i_file:
for markers in i_file.read().splitlines()[1:]:
problematic_markers |= set(markers.split("\t")[2].split(";"))
# Writing the summary results
total_remaining = 0
with open(os.path.join(base_dir, "results_summary.txt"), "a") as o_file:
print >>o_file, "# {}".format(script_prefix)
rep_counter = Counter(duplicated_count.values()).most_common()
if rep_counter:
print >>o_file, "Number of replicated markers"
else:
print >>o_file, "Number of replicated markers\t0"
total_nb_removed_rep = 0
for rep_type, rep_count in rep_counter:
nb_removed_rep = (rep_count * rep_type) - rep_count
print >>o_file, " - x{}\t{:,d}\t-{:,d}".format(
rep_type,
rep_count,
nb_removed_rep,
)
total_nb_removed_rep += nb_removed_rep
total_remaining = total_nb_removed_rep - len(removed_markers)
print >>o_file, (
"Number of replicated markers kept\t{nb:,d}\t+{nb:,d}".format(
nb=total_remaining,
)
)
print >>o_file, ("Poorly chosen replicated markers\t"
"{nb:,d}".format(nb=len(not_good_still)))
print >>o_file, ("Final number of excluded markers\t"
"{nb:,d}".format(nb=len(removed_markers)))
print >>o_file, "---"

# We create a LaTeX summary
latex_file = os.path.join(script_prefix + ".summary.tex")
Expand Down Expand Up @@ -641,18 +664,37 @@ def run_duplicated_snps(in_prefix, in_type, out_prefix, base_dir, options):
)
print >>o_file, latex_template.wrap_lines(text)

if total_remaining > 0:
text = latex_template.textbf(
"In total, {:,d} maker{} {} not merged for different "
"reasons (low completion rate, discordant allele, "
"discordant MAF, etc) and {} still present in the "
"dataset.".format(
total_remaining,
"s" if total_remaining > 1 else "",
"were" if total_remaining > 1 else "was",
"are" if total_remaining > 1 else "is",
)
)
print >>o_file, latex_template.wrap_lines(text)

if len(not_good_still) > 0:
start = "A total of"
end = " and {} still present in the final dataset.".format(
"are" if len(not_good_still) > 1 else "is",
)
if total_remaining > 0:
start = "Out of these,"
end = "."
text = latex_template.textbf(
"There {} {:,d} marker{} that {} not good enough for "
start + " {:,d} marker{} {} not good enough for "
"completion, but {} still selected as the best "
"duplicate and {} still present in the final "
"dataset.".format(
"were" if len(not_good_still) > 1 else "was",
"duplicate{}".format(
len(not_good_still),
"s" if len(not_good_still) > 1 else "",
"were" if len(not_good_still) > 1 else "was",
"were" if len(not_good_still) > 1 else "was",
"are" if len(not_good_still) > 1 else "is",
end,
)
)
print >>o_file, latex_template.wrap_lines(text)
Expand All @@ -661,30 +703,6 @@ def run_duplicated_snps(in_prefix, in_type, out_prefix, base_dir, options):
msg = "{}: cannot write LaTeX summary".format(latex_file)
raise ProgramError(msg)

# Writing the summary results
with open(os.path.join(base_dir, "results_summary.txt"), "a") as o_file:
print >>o_file, "# {}".format(script_prefix)
counter = Counter(duplicated_count.values()).most_common()
if counter:
print >>o_file, "Number of replicated markers"
else:
print >>o_file, "Number of replicated markers\t0"
for rep_type, rep_count in counter:
print >>o_file, " - x{}\t{:,d}\t-{:,d}".format(
rep_type,
rep_count,
(rep_count * rep_type) - rep_count,
)
print >>o_file, ("Poorly chosen replicated markers\t"
"{nb:,d}\t+{nb:,d}".format(nb=len(not_good_still)))
print >>o_file, ("Problematic markers not chosen\t"
"{nb:,d}\t+{nb:,d}".format(
nb=len(problematic_markers - chosen_markers),
))
print >>o_file, ("Final number of excluded markers\t"
"{nb:,d}".format(nb=len(removed_markers)))
print >>o_file, "---"

# We know this step does produce a new data set (tfile), so we return it
return (os.path.join(out_prefix, "dup_snps.final"), "tfile", latex_file,
duplicated_snps.desc, duplicated_snps.long_desc, None)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

MAJOR = 1
MINOR = 7
MICRO = 0
MICRO = 1
VERSION = "{0}.{1}.{2}".format(MAJOR, MINOR, MICRO)


Expand Down

0 comments on commit 64ba7ba

Please sign in to comment.