Skip to content

Commit

Permalink
c0.17.11 Formatted
Browse files Browse the repository at this point in the history
  • Loading branch information
AshrithSagar committed Nov 1, 2022
1 parent 681ed23 commit f247e3d
Showing 1 changed file with 22 additions and 38 deletions.
60 changes: 22 additions & 38 deletions grasp/grasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
class BAlaS:
"""BUDE Alanine Scan: ddG values
"""

def __init__(self):
self.stable_df_bals = self.df_bals = self.df_ddg = None
self.ddg_threshold = None
self.positions = []


def bals_read(self, bals2csv_file):
"""Accepts .csv files converted using bals2csv.py
"""
Expand All @@ -33,7 +33,6 @@ def bals_read(self, bals2csv_file):

return self.df_bals


def replot_read(self, replot_csv_file):
"""Accepts .csv files from replot"""
df_ddg = pandas.read_csv(replot_csv_file)
Expand All @@ -43,7 +42,6 @@ def replot_read(self, replot_csv_file):
self.df_ddg = df_ddg
return df_ddg


def replot_filter(self, mutation_lock=None, lower_threshold=0, upper_threshold=1):
"""Filter out based on ddG thresholds"""
# Remove mutation_lock positions.
Expand All @@ -53,14 +51,13 @@ def replot_filter(self, mutation_lock=None, lower_threshold=0, upper_threshold=1

df_lower = df_unlocked[df_unlocked['ddGs'] > lower_threshold]
ddg_threshold = df_lower[df_lower['ddGs'] < upper_threshold]
print("DDG values for between", lower_threshold,
"and", upper_threshold, "kCal/mol:\n",
ddg_threshold.sort_values('ddGs', ascending=True))
print("DDG values for between", lower_threshold,
"and", upper_threshold, "kCal/mol:\n",
ddg_threshold.sort_values('ddGs', ascending=True))

self.ddg_threshold = ddg_threshold
return ddg_threshold


def replot_get_positions(self, count=5):
"""Get positions from filtered ddG thresholds"""
ddg_threshold_sorted = self.ddg_threshold.sort_values('ddGs', ascending=True)
Expand All @@ -81,12 +78,12 @@ def replot_get_positions(self, count=5):
class MutationObject:
"""Mutation format
"""

def __init__(self, sequence, mutation):
self.wild_type = self.position = self.mutant_type = None
self.sequence = sequence
self.from_str(mutation)


def from_str(self, mutation):
"""Convert mutation in string format to MutationObject.
"""
Expand All @@ -101,15 +98,13 @@ def from_str(self, mutation):
mutant_type = re.search(r'([A-Za-z])$', mutation)
self.mutant_type = mutant_type[0] if mutant_type is not None else self.mutant_type


def new_mutant_type(self, mut_ty):
"""Create a new MutationObject with a differnt mutant_type attribute.
"""
new_obj = copy.copy(self)
new_obj.mutant_type = mut_ty
return new_obj


def to_str(self):
"""Convert MutationObject to string format.
"""
Expand All @@ -121,6 +116,7 @@ def to_str(self):
class Mutater:
"""Mutater: groups, ddg, dipeptide
"""

def __init__(self, sequence, mutations=None, mutation_lock=None):
self.groups = {
'polar_uncharged': ['S', 'T', 'C', 'N', 'Q'],
Expand All @@ -135,13 +131,11 @@ def __init__(self, sequence, mutations=None, mutation_lock=None):
self.sequential_mutations = self.sequences_consumable = None
self.sequences = None


def append_mutation(self, mut_obj):
"""Append input mutation object to self.mutations
"""
self.mutations.append(mut_obj)


def by_groups(self):
"""Returns mutation arrays based on group mutations
"""
Expand Down Expand Up @@ -171,30 +165,27 @@ def by_groups(self):
self.sequential_mutations = sequential_mutations
return sequential_mutations


def to_sequences(self):
"""P&C of new_mutations. nCr approach.
Choose r mutation positions at a time, out of n mutations.
Implemented using the Cartesian product."""
try:
seqs = [x for x in itertools.product(*self.sequential_mutations)]
self.sequences_consumable = False
except: # pylint: disable=bare-except
except: # pylint: disable=bare-except
seqs = itertools.product(*self.sequential_mutations)
self.sequences_consumable = True
print("S| Converted mutations to sequences format")

self.sequences = seqs
return seqs


def show_sequences(self):
"""Print self.sequences"""
for sequence in self.sequences:
sequence = "".join(sequence)
print(sequence)


def save_sequences(self, file):
"""Save self.sequences to a file"""
with open(file, "w", encoding='utf8') as opened_file:
Expand All @@ -203,29 +194,26 @@ def save_sequences(self, file):
opened_file.write(f"{line}\n")
print("S| Saved sequences to", file)


def remove_dipeptides(self):
"""Remove dipeptides from self.sequences"""
check_dipeptide = lambda seq: re.search(r"(.)\1", str(seq))
get_all_dipeptides = lambda seq: re.finditer(r"(.)\1", seq)
def check_dipeptide(seq): return re.search(r"(.)\1", str(seq))
def get_all_dipeptides(seq): return re.finditer(r"(.)\1", seq)

try:
seqs = [x for x in itertools.filterfalse(check_dipeptide, map("".join, self.sequences))]
self.sequences_consumable = False
except: # pylint: disable=bare-except
except: # pylint: disable=bare-except
seqs = itertools.filterfalse(check_dipeptide, map("".join, self.sequences))
self.sequences_consumable = True
print("S| Removed dipeptides from sequences")

self.sequences = seqs
return sequences


def by_intein_sequences(self):
"""[SKIPPED]
"""


def by_cleavage_sites(self):
"""[SKIPPED]
"""
Expand All @@ -238,12 +226,10 @@ def by_cleavage_sites(self):
# match = str(sequence).find(intein)
# return not match


def by_charge_criterion(self):
"""[SKIPPED]
"""


def randomise(self):
"""At random positions
"""
Expand All @@ -258,7 +244,6 @@ def randomise(self):
# mutated_positions = new_mutation_positions
# sequences = groups_mutations(sequence, mutation_positions)


def random_sampler(self, choose=5):
"""Random sampling through random.
Chooses 5 sequences by default.
Expand All @@ -270,14 +255,12 @@ def random_sampler(self, choose=5):
try:
seqs = random.sample(seqs, choose)
print("S| Sampled", choose, "sequences")
except: # pylint: disable=bare-except
except: # pylint: disable=bare-except
pass


self.sequences = seqs
return sequences


def monte_carlo_sampler(self, choose=5):
"""Random sampling through Monte-Carlo.
A normal distribution is chosen.
Expand All @@ -298,7 +281,8 @@ def check_dipeptide(self, sequence):
"""Returns first dipeptide match if present, else None
"""
match = re.search(r"(.)\1", str(sequence))
if match: return match[0], match.span()
if match:
return match[0], match.span()
return None, None

def dipeptide_matches(self, sequences):
Expand Down Expand Up @@ -330,7 +314,7 @@ def dipeptide_mutater(self, sequence, dipeptide, ddg):

# Conservative replacement.
contents = [sequence]
contents.append(str(mutation_position)) # Just one mutation_position.
contents.append(str(mutation_position)) # Just one mutation_position.
print(contents)
sequence, mutations = format_input(contents)

Expand All @@ -345,12 +329,12 @@ def dipeptide_mutater(self, sequence, dipeptide, ddg):
if mutation['wild_type'] in groups[types]:
for AA in groups[types]:
position = int(mutation['position'])
if (AA == sequence[position-2])|(AA == sequence[position]):
if (AA == sequence[position-2]) | (AA == sequence[position]):
print("Discarding mutation of", mutation['wild_type'],
"with", AA, "at", position)
"with", AA, "at", position)
else:
print("DP: Mutating", mutation['wild_type'],
"with", AA, "at", position)
"with", AA, "at", position)
new_mutation = mutation['wild_type'] + mutation['position'] + AA
new_mutations.append(new_mutation)
break
Expand All @@ -365,7 +349,7 @@ def save_as(file, contents):

def to_mut_obj(sequence, given_mutations):
"""Convert to MutationObject"""
remove_new_line = lambda s: str(s).replace('\n', '')
def remove_new_line(s): return str(s).replace('\n', '')

muts = []
for line in given_mutations:
Expand All @@ -377,7 +361,7 @@ def to_mut_obj(sequence, given_mutations):


def format_input(contents):
remove_new_line = lambda s: str(s).replace('\n', '')
def remove_new_line(s): return str(s).replace('\n', '')

sequence = remove_new_line(contents[0])
given_mutations = contents[1:]
Expand All @@ -396,7 +380,8 @@ def main():
parser.add_argument('-g', '--groups', action='store_true', help='Groups filter')
parser.add_argument('-a', '--alaninescan', help='Alanine scan DDG results from BUDE Alanine scan')
parser.add_argument('-l', '--lock', type=str, dest='mutation_lock', help='Mutation lock positions')
parser.add_argument('-c', '--count', type=int, dest='mutation_count', help='Number of mutations to consider at a time', default=1)
parser.add_argument('-c', '--count', type=int, dest='mutation_count',
help='Number of mutations to consider at a time', default=1)
args = parser.parse_args()

# If --output not specified, use input_file filename.
Expand All @@ -413,7 +398,7 @@ def main():
lock_contents = file.readlines()
mutation_lock = []
for line in lock_contents:
content = line.replace('\n', '') # Remove newlines.
content = line.replace('\n', '') # Remove newlines.
mutation_lock.append(content)
mutations_obj.mutation_lock = mutation_lock.sort(key=lambda x: int(x))
print("Locked mutation positions:", mutation_lock)
Expand All @@ -423,7 +408,6 @@ def main():
seqs = mutations_obj.to_sequences()
mutations_obj.save_sequences(output_file.replace(".txt", "_GrpAllSeqs.txt"))


if args.alaninescan:
bude = BAlaS()
df_ddg = bude.replot_read(args.alaninescan)
Expand Down

0 comments on commit f247e3d

Please sign in to comment.