From 1f79110b15c2a6eea0904300907a45dbde39c073 Mon Sep 17 00:00:00 2001 From: jpquast Date: Wed, 21 Aug 2024 14:15:07 +0200 Subject: [PATCH] Update example and metal_list --- R/extract_metal_binders.R | 6 +++++- data-raw/metal_list.R | 24 ++++++++++++------------ data/metal_list.rda | Bin 1664 -> 1666 bytes 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/R/extract_metal_binders.R b/R/extract_metal_binders.R index b2758fdd..26f1a2c1 100644 --- a/R/extract_metal_binders.R +++ b/R/extract_metal_binders.R @@ -90,7 +90,8 @@ #' columns = c( #' "ft_binding", #' "cc_cofactor", -#' "cc_catalytic_activity" +#' "cc_catalytic_activity", +#' "keyword" #' ) #' ) #' @@ -690,11 +691,14 @@ extract_metal_binders <- function(data_uniprot, )) %>% dplyr::mutate(chebi_id = str_split(.data$chebi_id, pattern = ";")) %>% tidyr::unnest("chebi_id") %>% + # Use the atom id if chebi ion ID is not present dplyr::mutate(chebi_id = ifelse(is.na(.data$chebi_id), stats::setNames(metal_list$chebi_id, metal_list$name)[.data$keyword], .data$chebi_id )) %>% + # first use uniprot data to directly annotate IDs that don't have a formula with the correct metal dplyr::mutate(metal_id_part = stats::setNames(metal_chebi_uniprot$metal_atom_id, as.character(metal_chebi_uniprot$id))[.data$chebi_id]) %>% + # then use the metal_list to annotate the rest dplyr::mutate(metal_id_part = ifelse(is.na(.data$metal_id_part), stats::setNames(unlisted_metal_list$chebi_id, as.character(unlisted_metal_list$chebi_ion_id))[.data$chebi_id], .data$metal_id_part diff --git a/data-raw/metal_list.R b/data-raw/metal_list.R index 60924952..3752e7b2 100644 --- a/data-raw/metal_list.R +++ b/data-raw/metal_list.R @@ -513,25 +513,25 @@ metal_list <- data.frame( "33610", "29101", "39127", - "60272", + "63062", "37763", # elemental "29103", - "39124", + "39123", "155869", # elemental "190496", "35172", - "61310", + "33516", "25155", "24875", "23336", - "60248", + "25516", "23378", - "27365", + "63056", "84043", "30549;30550", "35826", # elemental "49847", - "39130", + "39129", "49978;49962", NA, # no zirconium ion "155922", # elemental @@ -540,22 +540,22 @@ metal_list <- data.frame( "30686;49862", NA, # no Rhodium ion NA, # no Palladium ion - "60247", + "60253", "63063", "49664", "30475;30476", "49867", "36660", # elemental "60270", - "39126", + "39125", "37317;49701", - "37265", # elemental + "37265", # elemental submitted: CHEBI:231845 "229784", "229785", NA, # no Promethium ion "49890", "49588;49591", - "37285", # elemental + "37285", # elemental submitted: CHEBI:231846 "49902", NA, # no Dysprosium ion "49650", @@ -573,8 +573,8 @@ metal_list <- data.frame( "33970", # elemental "25197", "49920;30439", - "60249", - "85543", + "60252", + "85544", NA, # no Polonium ion NA, # no Astatine ion "33502", diff --git a/data/metal_list.rda b/data/metal_list.rda index c1be86df2d542fe41c4a18ee84a9a2bb18a6259e..da710dd7c999522ac43babadde05f7725e64e7ac 100644 GIT binary patch delta 1655 zcmV--28j894T230LRx4!F+o`-Q&|mOxABn{zQKo#^}|rq&=Pu2DW**{ z(K2l`YGF+eQ$P8bjl9;QP;00^o0iIWg%PfbQfnE*5Z&;S4dpa3+;RD(chG-w$# z8Udit$%FvWqd?K2kTe0NK!in4Aq|oo|8sR4FCfW4FDPd14AIur+hPZPxz8aB$7y!l1U_zNhFfn zq2TylA5W>xHgxi+QKe3$TE)k+Xv=plnsMkNmHZfSW675`bo6OcSKtHy0N{*lXd|>e z>I#%aPYl1@pw|RN@>xX`^H!nMn{ak3tAAdwro|d-?cq`U!^cDIgYW#ma(aK<9%}(X z-<&ZwDfWsYx{y=~Z*EDyFYkYE@7tFR{p`D@-_#p3#ea=sjdw0aqmnTT&^<~1Yl{uZ zohrCt6fCtgP83Xq-4^Q+O!%diC!ptz=CGm5ZK4T94m^u_c5H~|+dTs+L3XeWB7e&l zR~(8nhehog8K|jJ&xw(BeXFBY6E3wkS2Fmtu3TH(L}6j@oJ}-Jnk>>-H0KxKJsTFat!CtxLLlkDdyPWYH2o0> zK^Mp+#m>a+BW#@=uy&kZEmTwQxG%C4`gIA0;J&5k)g}T6f-q(iYz8Be#(&NZjc}c9 zbT0286Q=JL{*nPJXR2@2dmbtU;tK^Z#9AyQAjqX61qcjfGAtyb0b^tYL=Xh!2w+*b zKMpGOsLes+6>jS;L2fAvIeKxI!dvm43=v`oBnL%nhIITmaNees9ujAV2tcMT?1oAO z<1nJ&K17pQMO3|mo{#92Ux7=>;^NYUSQ5N}!9miGHt zAc;F_6KhC0n@-tBE+!a&RyNIws(fm5N|93Yok}A2N^26NJ()1Frd7RV_M5w!7^8&q zsEa``qcIN-q~H@pf^vYSF+?G7m>__FkX8gOTNuXJOaND1b=R+(vwuFC>#f>pFfgbQ z5fA_X0001hfPjF3*a!#+2o@!YXJ5$#_ZNN95_!ymB%Zy8apR#t;IeX<$mxG@Ecawv z*%*MR=!k%>P@c?)5X7Ktyg=NDtCy^ylVZXbH0`vpZ$d<;w)rH&&u?yraAiM6>z-?n=DghQG+a;4NlBTgnxpOFggrzc82v{!4!;# z&p#efV-6j-DH_LPun<3ZwvBZ2HXOnz^^WGyo;m(=vsS=BTGPTnxhn+M0FZEzgB~zD7)+ zd?s8pIekwvqcK9$^;bVS>0c>(-P13w5H-(Na_6aV=6^cT)$CO)#CR^m(Vr#|1-%%Y z_sKCDn?%&TS)(~#dQ^*0uvsWdOnL5_i?c;UII~rv14f;!QiSC02RStPCZOekVMGE5 z?OPDGNValPqzOvpua-7zb1+*wQggv2^_cu^<>c_#l7;1K*6<7gn`J39(x`}n)`nyR zlon+rY+%XEFm#$T#p(b9Bo)w=~alZcny z(HbMGRXMsYQL3z4@6(IFzkOYe`}TCX&U3p@d*1iIV0nOnHc?ObyOJrwgn(-GzlZ_~ B>H7cx delta 1653 zcmV-*28#KD4S)>}LRx4!F+o`-Q&}Rg{aBF>Ab;IbOEWB>p(0K^S609N%?88TrQMga^=Moa(! zF@G2%0Wd};nqd-1LTWNLra%KA05kvq8UO%jWFOFG=Re{}B$7!YQb{C|NhFd>Zx5x@ z@j9Je9sUp?!tsX^FPEHW5+ur(GHmJeX;Z0IwR;}6ZCm#Dxpwvc^&*t1=_s5C_Q$zG zr6Q+(VcDU&2#ey!N-5;IL!Ug*b*{Hc;D4^2HPZ8BQT)ThN7onIXAK0`{{Os~!9m)a zu|AdciXyu3R0`PoNwqN8{$aJ3OC}o{Uxc6aQe6=b@zJur2pq_W!3t(?;cM z^`TZW+rqR9g2Q-EuxEw!y6WDkme-^U8bW(!R|TSSdECvduXyK}=m!x!$-qe#Sbw^> z<58SCFKE!sMM{=@OpB}Z?u}GTy42lV%i|cVs;aE!@t-heoMTUra}N#;rLC!8X7oeE z+-Yrf#pdxaX6b^=#DZCa5)Hfp*t4-?wl{f-ok-g?WtAMTC93Fzj%XY^>C}zU8{ueR zi%SV0$fZOIic9IJm&sHSn4nZHfM1e}k}lqs zYvm=2lvNeaR5+lpHULl&M?pvuK+ICjs|dpZ0sx5&9IF{6YgxIjHwv+9SD6D~G6fSQ zjJmfO5KQG2h}LffYm&2qR)3bOaL^8}u59mgW-$uWf*fKMxd|gjebhm{XKGv9?Dh~M zPS2v7T0y~_Cu}2^6AVBr8&+)#yL@V7$|@c+sYGLRrm-qR*^>(@Wn0!?X}h_hilm-( zn8FE#8HjjwCc!jVCnySI6hVfl&=NpINh-oOF|BK>Dj+JVs;bw`%YS!GRaE^nARtnp z00000002NhKtMoHAR!qU85tW_>JoTsX4MPkth|NtaPbzg#iIb=vT~Tn<^JGV?Z~&X zF#%E05dmDGJn|qz5`nVu19Bp+Ub2QwiwIoPv%-*B0yA{z$S|2n5Rge~$`Szh)s`3; zSuyKn=yGYmubo-Ctx&nrE(T47U`!O*A^)eOF`Po1bx6ygBV5 zMy`Fc!}}VMe6bNHKO_{woX@USDvk+M`D{;>XbrAlYJOr|WPcW!!KDT+V|P~t>k_s< zHQp?uwo9ITvl`bk9}uS7m6)whbm=+Yei-A7shzMWKpP2!Orf`e-d=2T?_a~y7*3Ir zW0PVJG0h}0BRej8j>ANyn5mZ`w=Vq%h&AfjmDJrnJB+@5?MSyFXhJWV6LhPfE6O`tQCx)PrxzW;-NI^kGBL+^_8fVppjLN{lhbax=W0~g_ z)`;q1y$TXJ%K)lC1;)>;&Nx>J?SlXT0!Yo~A|Xl;5P?r90pYS=yr&54kXBq$%k#+T zWe!f)pV641Y5L2XnR4GDd)?D7x)3$ZS90g8aOA8Czki;Yrz48xhYa~J04(Uj_(EC9 z6i7}~7n;$R1QfK%8%1)ArNy4&G2EWUIPm7DL`bx)ty;y>e?gfz@=uJ)^8&~Ok-3da z8!H>fLeeHHa(uBjTibfcxkhnI!o(jFxj6h4rN+#zgCGI`L#wGso@J2`V-nnel#qUi>B)vho!g?rJDM#f z)jFun?YECrICt>ThrPdjJ&pVJbiMC;`!Iwd2xs?<^bjVpDgPI8ML1B9BC!2f