Skip to content

Commit

Permalink
small changes
Browse files Browse the repository at this point in the history
  • Loading branch information
“Marcel-Mueck” committed Oct 16, 2023
1 parent 7cdb6ab commit 22b979a
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
19 changes: 10 additions & 9 deletions deeprvat/annotations/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,9 +1090,12 @@ def process_deepripe(deepripe_df:object, column_prefix:str)->object:
deepripe_df.drop_duplicates(subset=["chrom", "pos", "ref", "alt"], inplace=True)
return deepripe_df

def process_vep(vep_file:object)->object:
vep_file[["chrom", "pos", "ref", "alt"]] = vep_file["#Uploaded_variation"].str.split(
":", expand=True
def process_vep(vep_file: object) -> object:
vep_file[["chrom", "pos", "ref", "alt"]] = (
vep_file["#Uploaded_variation"]
.str.replace("_", ":")
.str.replace("/", ":")
.str.split(":", expand=True)
)

vep_file["pos"] = vep_file["pos"].astype(int)
Expand Down Expand Up @@ -1136,16 +1139,14 @@ def concat_annotations(pvcf_blocks_file:str, annotation_dir:str, filename_patter
]
for f in tqdm(file_paths):
logger.info(f"processing file {f}")
file = pd.read_parquet(f)
logger.info(file.shape)
logger.info(file.columns)

if f == file_paths[0]:
logger.info("creating new file")
file = pd.read_parquet(f)
logger.info(file.shape)
logger.info(file.columns)
file.to_parquet(out_file, engine= "fastparquet")
else:
file = pd.read_parquet(f)
logger.info(file.shape)
logger.info(file.columns)
try:
file.to_parquet(out_file, engine= "fastparquet", append=True)
except ValueError:
Expand Down
3 changes: 2 additions & 1 deletion deeprvat_annotations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ dependencies:
- fastparquet=2023.4.0
#comment out lines below if you want to use preinstalled bcftools or samtools
- bcftools=1.17
- samtools=1.17
- samtools=1.17
- ensembl-vep=110.1

0 comments on commit 22b979a

Please sign in to comment.