From 1f52736ea5ace362d79034aabfe67eeb675e036a Mon Sep 17 00:00:00 2001 From: cajac102 Date: Thu, 21 Nov 2024 11:08:56 +0100 Subject: [PATCH 1/3] fix modification regex to not include square brackets --- .../io/parsing/io_parse_settings/parse_settings_fragpipe.toml | 2 +- .../io/parsing/io_parse_settings/parse_settings_sage.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/proteobench/io/parsing/io_parse_settings/parse_settings_fragpipe.toml b/proteobench/io/parsing/io_parse_settings/parse_settings_fragpipe.toml index 38e64e5e..883d52e4 100644 --- a/proteobench/io/parsing/io_parse_settings/parse_settings_fragpipe.toml +++ b/proteobench/io/parsing/io_parse_settings/parse_settings_fragpipe.toml @@ -29,7 +29,7 @@ "before_aa" = false "isalpha" = true "isupper" = true -"pattern"="\\[([^]]+)\\]" +"pattern"="(?<=\\[).+?(?=\\])" "modification_dict" = {"57.0215" = "Carbamidomethyl", "57.0216" = "Carbamidomethyl", "15.9949" = "Oxidation", "-17.026548" = "Gln->pyro-Glu", "-18.010565" = "Glu->pyro-Glu", "42.0106" = "Acetyl"} [general] diff --git a/proteobench/io/parsing/io_parse_settings/parse_settings_sage.toml b/proteobench/io/parsing/io_parse_settings/parse_settings_sage.toml index 1ca587cd..cdd48c4a 100644 --- a/proteobench/io/parsing/io_parse_settings/parse_settings_sage.toml +++ b/proteobench/io/parsing/io_parse_settings/parse_settings_sage.toml @@ -29,7 +29,7 @@ "before_aa" = false "isalpha" = true "isupper" = true -"pattern"="\\[([^]]+)\\]" +"pattern"="(?<=\\[).+?(?=\\])" "modification_dict" = {"+57.0215" = "Carbamidomethyl", "+15.9949" = "Oxidation", "-17.026548" = "Gln->pyro-Glu", "-18.010565" = "Glu->pyro-Glu", "+42" = "Acetyl"} [general] From 9ce253a729397f1c5709d6b0486aa04e69212be4 Mon Sep 17 00:00:00 2001 From: cajac102 Date: Thu, 21 Nov 2024 12:46:10 +0100 Subject: [PATCH 2/3] parse charge to int for alphapept --- proteobench/io/parsing/parse_ion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/proteobench/io/parsing/parse_ion.py b/proteobench/io/parsing/parse_ion.py index 52835edc..c7e52ec4 100644 --- a/proteobench/io/parsing/parse_ion.py +++ b/proteobench/io/parsing/parse_ion.py @@ -14,6 +14,7 @@ def load_input_file(input_csv: str, input_format: str) -> pd.DataFrame: input_data_frame = pd.read_csv(input_csv, sep="\t", low_memory=False) elif input_format == "AlphaPept": input_data_frame = pd.read_csv(input_csv, low_memory=False) + input_data_frame["charge"] = input_data_frame["charge"].astype(int) elif input_format == "Sage": input_data_frame = pd.read_csv(input_csv, sep="\t", low_memory=False) elif input_format == "FragPipe": From f4ec6cb341782c266deda9e60ebb39e1121a70c1 Mon Sep 17 00:00:00 2001 From: RobbinBouwmeester Date: Fri, 22 Nov 2024 12:42:23 +0100 Subject: [PATCH 3/3] Update parse_ion.py --- proteobench/io/parsing/parse_ion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/proteobench/io/parsing/parse_ion.py b/proteobench/io/parsing/parse_ion.py index c7e52ec4..80dfcabd 100644 --- a/proteobench/io/parsing/parse_ion.py +++ b/proteobench/io/parsing/parse_ion.py @@ -13,8 +13,7 @@ def load_input_file(input_csv: str, input_format: str) -> pd.DataFrame: if input_format == "MaxQuant": input_data_frame = pd.read_csv(input_csv, sep="\t", low_memory=False) elif input_format == "AlphaPept": - input_data_frame = pd.read_csv(input_csv, low_memory=False) - input_data_frame["charge"] = input_data_frame["charge"].astype(int) + input_data_frame = pd.read_csv(input_csv, low_memory=False, dtype={"charge": int}) elif input_format == "Sage": input_data_frame = pd.read_csv(input_csv, sep="\t", low_memory=False) elif input_format == "FragPipe":