Skip to content

Latest commit

 

History

History
45 lines (33 loc) · 1.56 KB

EvoDiff_sequence_generation.md

File metadata and controls

45 lines (33 loc) · 1.56 KB

Run EvoDiff in Docker Locally

docker run -v .:/workspace/evodiff/PD1 --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --name evodiff --rm -it evodiff /bin/bash

## cd PD1/

Evolutionary Guided Sequence Generation

from evodiff.pretrained import MSA_OA_DM_MAXSUB
from evodiff.generate_msa import generate_query_oadm_msa_simple
import re
import torch
torch.set_default_device('cuda:0')

checkpoint = MSA_OA_DM_MAXSUB()
model, collater, tokenizer, scheme = checkpoint

H Chain

path_to_msa = './sequence_generation/inputs/PD1_Hchains_aligned.a3m'
n_sequences=33 # number of sequences in MSA to subsample
seq_length=200 # maximum sequence length to subsample
selection_type='random' # or 'MaxHamming'; MSA subsampling scheme


tokenized_sample, generated_sequence  = generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, device=0, selection_type=selection_type)

print("New H chain sequence (no gaps, pad tokens)", re.sub('[!-]', '', generated_sequence[0][0],))

L Chain

path_to_msa = './sequence_generation/inputs/PD1_Lchains_aligned.a3m'
n_sequences=33 # number of sequences in MSA to subsample
seq_length=200 # maximum sequence length to subsample
selection_type='random' # or 'MaxHamming'; MSA subsampling scheme


tokenized_sample, generated_sequence  = generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, device=0, selection_type=selection_type)

print("New L chain sequence (no gaps, pad tokens)", re.sub('[!-]', '', generated_sequence[0][0],))