diff --git a/docs/DeepGenomeScan_model_list.rmd b/docs/DeepGenomeScan_model_list.rmd new file mode 100644 index 0000000..3c2c9dd --- /dev/null +++ b/docs/DeepGenomeScan_model_list.rmd @@ -0,0 +1,21 @@ + +```{r library, include=FALSE} +library(DT) +library("DeepGenomeScan") +f <- system.file('extdata',package='DeepGenomeScan') +infile <- file.path(f, "GenomeScan_Model_list.csv") +model_info=read.csv(infile) + + +``` + +# Available Models + +The models below are available in `DeepGenomeScan`. + +```{r list_table, echo = FALSE} +datatable(model_info, rownames = FALSE, style = "bootstrap", + colnames = c("Model", "`method` Value", "Type", "Libraries", "Tuning Parameters"), + options = list(lengthMenu = c(nrow(model_info), 5, 10, 15, 20), + scrollX = TRUE)) +``` \ No newline at end of file diff --git a/docs/articles/Activation_functions.html b/docs/articles/Activation_functions.html index 2eb36d1..1376bcc 100644 --- a/docs/articles/Activation_functions.html +++ b/docs/articles/Activation_functions.html @@ -98,8 +98,8 @@
The activations below are available in model “RNNS-based” model in DeepGenomeScan
.
DeepGenomeScan implements the genome scan and genome-wide association studies via deep learing. DeepGenomeScan particularly good at detecting the subtle and weak signatures that are hard to identify by the linear approaches. DeepGenomeScan has several ready-to-use models constructed with typical architectures supported by different backends. As deep neural networks are highly computationally costly, choosing or constructing the architecture of deep learning depends on users’ data and computation demand. Therefore, if you know well about R or good at programming, we recommend you constructing your own model based on your demand.
+DeepGenomeScan for detecting natural selection +DeepGenomeScan implements the genome scan and genome-wide association studies via deep learing. DeepGenomeScan particularly good at detecting the subtle and weak signatures that are hard to identify by the common linear approaches, such as linear model, generalized linear model (GLM), PCA, RDA, LFMM, etc. DeepGenomeScan has several ready-to-use models constructed with typical architectures supported by different backends. As deep neural networks are highly computationally costly, choosing or constructing the architecture of deep learning depends on users’ data and computation demand. Therefore, if you know well about R or good at programming, we recommend you constructing your own model based on your demand.
In what follows, we provide the step-by-step deep learning architecture construction and genome scan implementation using DeepGenomeScan.
Install packages and dependences
if (!requireNamespace("DeepGenomeScan", quietly=TRUE)) devtools::install_github("xinghuq/DeepGenomeScan") -# Warning: replacing previous import 'keras::to_categorical' by -# 'kerasR::to_categorical' when loading 'DeepGenomeScan' -# Warning: replacing previous import 'keras::text_to_word_sequence' by -# 'kerasR::text_to_word_sequence' when loading 'DeepGenomeScan' -# Warning: replacing previous import 'keras::normalize' by -# 'kerasR::normalize' when loading 'DeepGenomeScan' -# Warning: replacing previous import 'keras::pad_sequences' by -# 'kerasR::pad_sequences' when loading 'DeepGenomeScan' -# Warning: replacing previous import 'caret::confusionMatrix' by -# 'RSNNS::confusionMatrix' when loading 'DeepGenomeScan' -# Warning: replacing previous import 'caret::train' by 'RSNNS::train' when -# loading 'DeepGenomeScan' if (!requireNamespace("KLFDAPC", quietly=TRUE)) devtools::install_github("xinghuq/KLFDAPC") @@ -125,138 +113,34 @@if (!requireNamespace("RNNS", quietly=TRUE)) install.packages("RNNS") -# Warning: package 'RNNS' is not available (for R version 3.6.1) if (!requireNamespace("RNNS", quietly=TRUE)) -install.packages("RNNS") -# Warning: package 'RNNS' is not available (for R version 3.6.1)
Our package incorporates several most commonly used deep learning libaries, such as Tensorflow, keras, RNNS, H2o, FCNN4R. This tutorial will show you how to construct your own deep learning model, tune the model, and get the SNP importance scores.
library(DeepGenomeScan) library(caret)### for ML calling functions and performance estimation library(keras) ### for DL -library("tensorflow") -library("caretEnsemble") +library(tensorflow) +library(caretEnsemble) library(kerasR) -library("RSNNS") library(NeuralNetTools)
The data used here is a single simulated data containing 640 individuals with 1000 SNPs that assumed under the environmental selection. There are 10 environmental factors acting on these population with different gradients. Details about the simulation and data can be found in Capblancq et al. 2018 (https://doi.org/10.1111/1755-0998.12906).
+The data used here is a single simulated data containing 640 individuals with 1000 SNPs that is assumed under the environmnetal selection. There are 10 environmental factors selecting 3 QTLs on these population with different gradients. Details about the simulated data can be found in Capblancq et al. 2018 (https://doi.org/10.1111/1755-0998.12906).
f <- system.file('extdata',package='DeepGenomeScan') infile <- file.path(f, "sim1.csv") sim_example=read.csv(infile) genotype=sim_example[,-c(1:14)] env=sim_example[,2:11] -str(sim_example) -# 'data.frame': 640 obs. of 1014 variables: -# $ pop_name: num 0 0 0 0 0 0 0 0 0 0 ... -# $ envir1 : num 2.62 2.62 2.62 2.62 2.62 ... -# $ envir2 : num 3.41 3.41 3.41 3.41 3.41 ... -# $ envir3 : num 1.82 1.82 1.82 1.82 1.82 ... -# $ envir4 : num 2.09 2.09 2.09 2.09 2.09 ... -# $ envir5 : num 2.65 2.65 2.65 2.65 2.65 ... -# $ envir6 : num 1.92 1.92 1.92 1.92 1.92 ... -# $ envir7 : num 2.75 2.75 2.75 2.75 2.75 ... -# $ envir8 : num -0.529 -0.529 -0.529 -0.529 -0.529 ... -# $ envir9 : num 3.56 3.56 3.56 3.56 3.56 ... -# $ envir10 : num 8.36 8.36 8.36 8.36 8.36 ... -# $ qtrait1 : num 8.52 6.7 6.91 7 8.56 ... -# $ qtrait2 : num 5.7 6.17 4.43 10.1 6.36 ... -# $ qtrait3 : num 7.04 1.71 8.22 8.1 8.7 ... -# $ X : int 2 2 1 0 1 2 0 1 0 0 ... -# $ X.1 : int 1 0 1 0 0 1 0 1 0 1 ... -# $ X.2 : int 1 1 0 1 0 1 0 1 2 2 ... -# $ X.3 : int 2 0 1 0 1 0 1 1 1 0 ... -# $ X.4 : int 1 0 2 1 2 1 1 2 1 1 ... -# $ X.5 : int 1 2 2 0 1 1 0 1 2 1 ... -# $ X.6 : int 1 1 1 1 1 1 0 1 1 1 ... -# $ X.7 : int 2 0 1 2 1 2 1 2 2 1 ... -# $ X.8 : int 2 1 1 1 1 2 1 1 2 0 ... -# $ X.9 : int 2 1 2 0 1 1 1 1 1 1 ... -# $ X.10 : int 0 1 1 1 1 1 0 1 1 1 ... -# $ X.11 : int 2 1 2 0 1 2 0 1 2 1 ... -# $ X.12 : int 1 0 1 1 1 0 1 2 1 0 ... -# $ X.13 : int 1 2 1 1 1 0 1 0 1 0 ... -# $ X.14 : int 1 1 1 1 0 0 1 0 1 0 ... -# $ X.15 : int 0 2 1 1 1 1 1 1 1 0 ... -# $ X.16 : int 2 0 2 1 1 1 1 1 1 2 ... -# $ X.17 : int 2 1 1 0 1 2 1 2 0 2 ... -# $ X.18 : int 1 1 0 0 0 1 0 1 1 2 ... -# $ X.19 : int 1 2 1 2 1 1 2 2 1 0 ... -# $ X.20 : int 0 1 1 0 1 1 1 0 1 1 ... -# $ X.21 : int 1 1 0 1 2 0 1 0 1 2 ... -# $ X.22 : int 1 2 1 1 0 1 1 2 1 1 ... -# $ X.23 : int 1 2 0 1 1 2 0 1 0 1 ... -# $ X.24 : int 1 2 2 1 0 1 2 0 1 1 ... -# $ X.25 : int 2 1 1 0 2 1 2 1 1 1 ... -# $ X.26 : int 2 1 1 1 2 2 1 2 1 2 ... -# $ X.27 : int 0 1 2 1 2 2 2 2 1 1 ... -# $ X.28 : int 1 0 1 2 2 1 2 1 1 1 ... -# $ X.29 : int 0 1 1 1 1 1 2 2 2 1 ... -# $ X.30 : int 0 2 1 1 0 1 2 1 1 1 ... -# $ X.31 : int 0 1 1 1 1 0 2 1 2 1 ... -# $ X.32 : int 0 0 1 1 1 1 1 0 1 2 ... -# $ X.33 : int 1 1 1 1 1 2 0 0 2 2 ... -# $ X.34 : int 1 1 0 0 0 1 0 1 0 1 ... -# $ X.35 : int 1 2 1 1 2 1 1 2 1 1 ... -# $ X.36 : int 0 1 0 0 0 1 1 1 0 1 ... -# $ X.37 : int 1 2 2 1 2 1 2 0 1 1 ... -# $ X.38 : int 2 1 1 1 1 2 0 1 1 1 ... -# $ X.39 : int 0 1 2 1 1 0 2 1 0 1 ... -# $ X.40 : int 2 1 2 0 2 1 1 1 1 1 ... -# $ X.41 : int 1 1 1 2 1 0 1 1 0 1 ... -# $ X.42 : int 1 2 2 1 1 1 2 1 2 2 ... -# $ X.43 : int 2 2 0 2 1 1 1 1 1 2 ... -# $ X.44 : int 1 1 2 1 2 0 1 0 0 0 ... -# $ X.45 : int 1 1 1 0 0 1 2 1 1 0 ... -# $ X.46 : int 0 1 0 0 1 1 0 1 1 2 ... -# $ X.47 : int 1 1 1 2 1 2 1 2 1 0 ... -# $ X.48 : int 2 2 1 2 1 1 1 2 0 2 ... -# $ X.49 : int 0 0 2 1 1 1 2 2 2 0 ... -# $ X.50 : int 0 1 0 1 0 1 0 1 1 1 ... -# $ X.51 : int 0 0 0 0 0 0 1 2 0 2 ... -# $ X.52 : int 1 1 1 0 1 1 1 1 1 1 ... -# $ X.53 : int 0 0 0 1 0 1 0 2 0 2 ... -# $ X.54 : int 1 1 1 2 1 0 1 1 1 1 ... -# $ X.55 : int 1 1 2 2 2 0 1 0 0 1 ... -# $ X.56 : int 1 1 1 0 2 1 1 2 1 2 ... -# $ X.57 : int 1 2 1 1 2 1 1 2 1 2 ... -# $ X.58 : int 1 0 0 0 0 0 1 1 0 0 ... -# $ X.59 : int 0 2 2 1 2 0 1 1 1 2 ... -# $ X.60 : int 0 0 0 1 2 1 0 1 1 1 ... -# $ X.61 : int 1 0 0 1 0 0 1 0 1 1 ... -# $ X.62 : int 2 2 2 1 0 0 1 2 2 1 ... -# $ X.63 : int 0 0 1 1 0 1 0 1 1 0 ... -# $ X.64 : int 1 0 0 1 1 1 0 0 0 0 ... -# $ X.65 : int 1 1 0 1 1 2 0 1 1 1 ... -# $ X.66 : int 0 1 0 1 1 1 0 1 1 1 ... -# $ X.67 : int 1 1 1 1 0 1 1 1 0 1 ... -# $ X.68 : int 1 1 1 1 2 1 1 2 1 0 ... -# $ X.69 : int 0 2 0 1 1 1 1 1 1 1 ... -# $ X.70 : int 0 1 0 1 0 2 1 1 1 0 ... -# $ X.71 : int 1 2 2 1 1 0 1 1 0 0 ... -# $ X.72 : int 0 2 1 0 1 2 1 1 1 1 ... -# $ X.73 : int 2 1 1 1 1 2 2 1 2 0 ... -# $ X.74 : int 1 1 1 2 0 0 1 0 0 1 ... -# $ X.75 : int 2 0 0 0 0 0 1 1 1 0 ... -# $ X.76 : int 1 0 2 1 1 2 1 1 0 1 ... -# $ X.77 : int 0 1 0 1 1 1 2 0 1 2 ... -# $ X.78 : int 1 1 0 0 1 0 1 0 0 1 ... -# $ X.79 : int 2 0 1 0 2 1 1 1 1 1 ... -# $ X.80 : int 1 0 0 1 1 1 1 0 0 0 ... -# $ X.81 : int 1 1 2 2 2 2 1 1 1 1 ... -# $ X.82 : int 1 2 1 1 1 1 2 2 1 1 ... -# $ X.83 : int 1 1 1 1 2 2 1 1 1 0 ... -# $ X.84 : int 2 1 2 0 1 0 1 1 1 2 ... -# [list output truncated]
The principal steps for implementing user-defined model are, first constructing the model architecture and compiling the model to make sure the basic model works; and then next step integrating it to DeepGenomeScan framework. Now we construct a MLP model and complie it using keras first.
+The principal steps for implementing user-defined model are, first constructing the model architecture and compiling the model to make sure the basic model works; and then integrating the model to DeepGenomeScan framework. Below, we construct a MLP model and complie it using keras.
###################################### example of MLP_keras_drop out########################## set.seed(123) @@ -310,8 +194,8 @@
Compiling model using DeepGenomeScan
Compiling the deep learning model using DeepGenomeScan requires user to store four key components for the model: 1.Model parameters and tuning values, 2.deep learning model architecture and fitted model, 3.model predict method/function or probability estimation (optional), 4.importance function.
-In terms of the (1) model parameters and tuning values, users should set their own model parameters, parameter labels, and tuning methods (grid or random search), the range of the parameter values if grid, or the rule to search the parameter space if random based on the architecture of the model. With respect to (2) deep learning model architecture and fitted model, users should put their compiled (working) model (including the fitted model) under the same data list. In addition, users should also write the predict function for model tuning and importance function for estimating SNP importance after the optimal model is returned. All these components should store according to the below instruction.
-Below we construct a MLP model and use DeepGenomeScan framework to compile it.
+In terms of the (1) model parameters and tuning values, users should set their own model parameters, parameter labels, tuning methods (grid or random search), and the range of the parameter values if using grid search or the rule to search the parameter space if using random search based on the architecture of the model. With respect to (2) deep learning model architecture and fitted model, users should put their compiled (working) model (including the fitted model) under the same data list. In addition, users should also write the predict function for model tuning and importance function for estimating SNP importance after the optimal model is returned. All these components should be stored/structured according to the below instruction.
+In what folllows, we construct a MLP model and use DeepGenomeScan framework to compile it.
This is a complete process for doing deep learing based genome scan using user-defined model. Because keras uses python tensorflow as the backend, we recommend users configuring the python tensorflow, keras that compatible with R before constructing this model.
+This is a complete process for doing deep learing based genome scan using user-defined model. Because keras uses python (tensorflow) as the backend, we recommend users configuring the python version of tensorflow and keras properly so that they are compatible with R before constructing this model.
Below is a model constructed using RSNNS library.
This implementation is also not fast, after model tuning, we estimated the SNP importance score that under the selection of 10 environmental factors.
+save.image(file = "Model_RSNNS_mlp_env1_sim_test.RData") +This implementation is also not fast. After model tuning, we estimated the SNP importance score that under the selection of 10 environmental factors.
-+#### Loci<-rep("Neutral", 1000) Loci[c(1,11,21,31,41,51,61,71,81,91)]<-"QTL1" Loci[c(101,111,121,131,141,151,161,171,181,191)]<-"QTL2" Loci[c(201,211,221,231,241,251,261,271,281,291)]<-"QTL3" - scaled_imp=normalizeData(RSNNS_simimp1$importance,type = "0_1") -SNPimp<-data.frame(index = c(1:1000), MLP= -log10(scaled_imp),Loci=Loci) +### RSNNS_simimp is a dataframe containing all SNP importance values (10 importance for all SNPs) from 10 environmnetal factors, users should import the SNP importance into one dataset +SNPimpq=DLqvalues(RSSNS_simimp,K=10) +SNPimp<-data.frame(index = c(1:1000), MLP= -log10(SNPimpq),Loci=Loci) Selected_Loci=SNPimp$Loci[-which(SNPimp$Loci=="Neutral")] +## Plotting Manhattan plot p1 <- ggplot() + geom_point(aes(x=SNPimp$index[-which(SNPimp$Loci!="Neutral")], y=SNPimp$MLP[-which(SNPimp$Loci!="Neutral")]), col = "gray83") + geom_point(aes(x=as.vector(SNPimp$index[-which(SNPimp$Loci=="Neutral")]), y=as.vector(SNPimp$MLP[-which(SNPimp$Loci=="Neutral")]), colour = Selected_Loci)) + @@ -709,7 +596,7 @@ylim(0,10) + theme_bw() p1
Manhattan plot not show here because of time.
+Manhattan plot not show here because of exhaust time. Make sure increasing the tune length to find a good model.
We demonstrated how to construct, compile and implement deep learing based genome scan step by step using different libraries. These models are also ready-to-use in our DeepGenomeScan framework. Users just set the name of the method to choose the corresponding model. The models available can be found in our documentation and “DeepGenomeScan Models” section.
+We demonstrated how to construct, compile and implement deep learing based genome scan step by step using different libraries. These models are also ready-to-use in our DeepGenomeScan framework. Users just need to change “method” names to choose the corresponding model. The models available can be found in our documentation and “DeepGenomeScan Models” section.
The models below are available in DeepGenomeScan
.
This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic learning and computational design integrating deep learning (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)), robust resampling and cross validations methods, as well as Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting variations under natural selection or omics-based association studies, such as GWAS, PWAS, TWAS, MWAS. The package use the tensorflow as the backend. The design makes the implemention user-friendly. Users can adopt the package’s framework to study various ecological and evolutionary questions, but not only constraining in biology field.
+This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic computational framework integrating different neural network architectures (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)) and robust resampling methods, as well as the Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting signatures of natural selection and performing omics-based genome-wide association studies, such as GWAS, PWAS, TWAS, MWAS. The design makes the implemention user-friendly. It is compatible with most self-defined machine learning models (the self-defined models shuold be complete, including tunable parameters, fitted model, predicted model, examples can be found in our tutorial). Users can adopt the package’s framework to study various evolutionary questions.
+###DA and KLFDAPC are used for SpGenomeScan + + if (!requireNamespace("DA", quietly=TRUE)) + + devtools::install_github("xinghuq/DA") + requireNamespace("KLFDAPC") if (!requireNamespace("KLFDAPC", quietly=TRUE)) devtools::Install_github("xinghuq/KLFDAPC") - if (!requireNamespace("DA", quietly=TRUE)) - - devtools::install_github("xinghuq/DA") if (!requireNamespace("keras", quietly=TRUE)) @@ -137,31 +140,18 @@checking if Tensorflow works properly K0=keras::backend()
## Warning: replacing previous import 'keras::to_categorical' by
-## 'kerasR::to_categorical' when loading 'DeepGenomeScan'
-## Warning: replacing previous import 'keras::text_to_word_sequence' by
-## 'kerasR::text_to_word_sequence' when loading 'DeepGenomeScan'
-## Warning: replacing previous import 'keras::normalize' by
-## 'kerasR::normalize' when loading 'DeepGenomeScan'
-## Warning: replacing previous import 'keras::pad_sequences' by
-## 'kerasR::pad_sequences' when loading 'DeepGenomeScan'
-## Warning: replacing previous import 'caret::confusionMatrix' by
-## 'RSNNS::confusionMatrix' when loading 'DeepGenomeScan'
-## Warning: replacing previous import 'caret::train' by 'RSNNS::train' when
-## loading 'DeepGenomeScan'
+loading library
-library(caret)### for ML calling functions and performance estimation -library(keras) ### for DL +library("DeepGenomeScan") +library("caret")### for ML calling functions and performance estimation +library("keras") ### for DL library("tensorflow") library("caretEnsemble") -library(kerasR) +library("kerasR") library("RSNNS") -library(NeuralNetTools)
-h2o_mlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1, +GSmlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1, method="mlp", metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" tuneLength = 10, ### 11 tunable parameters 11^2 @@ -201,12 +191,17 @@trControl = econtrol1) #### varIMP for SNPs -out=varImp(h2o_mlp,scale = FALSE)
-plot(out$Overall, ylab="SNP importance")
Welcome any feedback and pull request.
diff --git a/docs/index.html b/docs/index.html index f480831..2f641ab 100644 --- a/docs/index.html +++ b/docs/index.html @@ -78,33 +78,33 @@This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic learning and computational design integrating deep learning (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)), robust resampling and cross validations methods, as well as Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting variations under natural selection or omics-based association studies, such as GWAS, PWAS, TWAS, MWAS. The package use the tensorflow as the backend. The design makes the implemention user-friendly. Users can adopt the package’s framework to study various ecological and evolutionary questions, but not only constraining in biology field.
+This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic computational framework integrating different neural network architectures (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)) and robust resampling methods, as well as the Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting signatures of natural selection and performing omics-based genome-wide association studies, such as GWAS, PWAS, TWAS, MWAS. The design makes the implemention user-friendly. It is compatible with most self-defined machine learning models (the self-defined models shuold be complete, including tunable parameters, fitted model, predicted model, examples can be found in our tutorial). Users can adopt the package’s framework to study various evolutionary questions.
-library("devtools")
-
+library("devtools")
devtools::install_github("xinghuq/DeepGenomeScan")
-
-devtools::install_github("xinghuq/CaretPlus/pkg/caret")
-
+devtools::install_github("xinghuq/CaretPlus/pkg/caret")
requireNamespace("KLFDAPC")
+
+###DA and KLFDAPC are used for SpGenomeScan
+
+ if (!requireNamespace("DA", quietly=TRUE))
+
+ devtools::install_github("xinghuq/DA")
+
+requireNamespace("KLFDAPC")
if (!requireNamespace("KLFDAPC", quietly=TRUE))
devtools::Install_github("xinghuq/KLFDAPC")
- if (!requireNamespace("DA", quietly=TRUE))
-
- devtools::install_github("xinghuq/DA")
if (!requireNamespace("keras", quietly=TRUE))
@@ -116,35 +116,34 @@
if (!requireNamespace("kerasR", quietly=TRUE))
install.packages("kerasR")
-
+
-library("rappdirs")
+library("rappdirs")
library("reticulate")
reticulate::use_python("/usr/bin/python3")
library(caret) ### for ML calling functions and performance estimation, users should use the modified version at xinghuq/CaretPlus/caret instead of the original version
library(keras)
library("tensorflow")
-##checking if Tensorflow works properly
+checking if Tensorflow works properly
K0=keras::backend()
library(DeepGenomeScan)
-library(caret)### for ML calling functions and performance estimation
-library(keras) ### for DL
+loading library
+library("DeepGenomeScan")
+library("caret")### for ML calling functions and performance estimation
+library("keras") ### for DL
library("tensorflow")
library("caretEnsemble")
-library(kerasR)
+library("kerasR")
library("RSNNS")
-library(NeuralNetTools)
+library("NeuralNetTools")
-f <- system.file('extdata',package='DeepGenomeScan')
+f <- system.file('extdata',package='DeepGenomeScan')
infile <- file.path(f, "sim1.csv")
sim_example=read.csv(infile)
genotype=sim_example[,-c(1:14)]
env=sim_example[,2:11]
-str(sim_example)
-
+str(sim_example)
-econtrol1 <- trainControl(## 5-fold CV, repeat 5 times
+econtrol1 <- trainControl(## 5-fold CV, repeat 5 times
method = "adaptive_cv",
number = 5,
## repeated ten times
repeats = 5,search = "random")
set.seed(999)
-options(warn=-1
-
+options(warn=-1
-h2o_mlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1,
+GSmlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1,
method="mlp",
metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE"
tuneLength = 10, ### 11 tunable parameters 11^2
@@ -188,15 +183,18 @@
trControl = econtrol1)
#### varIMP for SNPs
-
-out=varImp(h2o_mlp,scale = FALSE)
+out=varImp(GSmlp,scale = FALSE)
-plot(out$Overall, ylab="SNP importance")
-
+#### Plot only the importance, remember to scan all environmnet factors and use DLqvalue function to convert the multi-effect importance values to q-values
+### here is only plot importance of an example of using one environment factor
+ scaled_imp=normalizeData(out$importance$Overall,type = "0_1")
+SNPimp<-data.frame(index = c(1:1000), MLP= -log10(scaled_imp))
+plot(y=-SNPimp$MLP,x=1:1000L, ylab="SNP importance")
+abline(h=2, col="blue")
+
Welcome any feedback and pull request.