diff --git a/docs/DeepGenomeScan_model_list.rmd b/docs/DeepGenomeScan_model_list.rmd new file mode 100644 index 0000000..3c2c9dd --- /dev/null +++ b/docs/DeepGenomeScan_model_list.rmd @@ -0,0 +1,21 @@ + +```{r library, include=FALSE} +library(DT) +library("DeepGenomeScan") +f <- system.file('extdata',package='DeepGenomeScan') +infile <- file.path(f, "GenomeScan_Model_list.csv") +model_info=read.csv(infile) + + +``` + +# Available Models + +The models below are available in `DeepGenomeScan`. + +```{r list_table, echo = FALSE} +datatable(model_info, rownames = FALSE, style = "bootstrap", + colnames = c("Model", "`method` Value", "Type", "Libraries", "Tuning Parameters"), + options = list(lengthMenu = c(nrow(model_info), 5, 10, 15, 20), + scrollX = TRUE)) +``` \ No newline at end of file diff --git a/docs/articles/Activation_functions.html b/docs/articles/Activation_functions.html index 2eb36d1..1376bcc 100644 --- a/docs/articles/Activation_functions.html +++ b/docs/articles/Activation_functions.html @@ -98,8 +98,8 @@

Available activation functions

The activations below are available in model “RNNS-based” model in DeepGenomeScan.

-
- +
+ diff --git a/docs/articles/Deep Learning Architecture Construction.html b/docs/articles/Deep Learning Architecture Construction.html index cfa4801..ac0bc42 100644 --- a/docs/articles/Deep Learning Architecture Construction.html +++ b/docs/articles/Deep Learning Architecture Construction.html @@ -85,28 +85,16 @@

7/11/2020

-
+

-DeepGenomeScan for detection of natural selection

-

DeepGenomeScan implements the genome scan and genome-wide association studies via deep learing. DeepGenomeScan particularly good at detecting the subtle and weak signatures that are hard to identify by the linear approaches. DeepGenomeScan has several ready-to-use models constructed with typical architectures supported by different backends. As deep neural networks are highly computationally costly, choosing or constructing the architecture of deep learning depends on users’ data and computation demand. Therefore, if you know well about R or good at programming, we recommend you constructing your own model based on your demand.

+DeepGenomeScan for detecting natural selection +

DeepGenomeScan implements the genome scan and genome-wide association studies via deep learing. DeepGenomeScan particularly good at detecting the subtle and weak signatures that are hard to identify by the common linear approaches, such as linear model, generalized linear model (GLM), PCA, RDA, LFMM, etc. DeepGenomeScan has several ready-to-use models constructed with typical architectures supported by different backends. As deep neural networks are highly computationally costly, choosing or constructing the architecture of deep learning depends on users’ data and computation demand. Therefore, if you know well about R or good at programming, we recommend you constructing your own model based on your demand.

In what follows, we provide the step-by-step deep learning architecture construction and genome scan implementation using DeepGenomeScan.

Install packages and dependences

 
 if (!requireNamespace("DeepGenomeScan", quietly=TRUE))
 devtools::install_github("xinghuq/DeepGenomeScan")
-# Warning: replacing previous import 'keras::to_categorical' by
-# 'kerasR::to_categorical' when loading 'DeepGenomeScan'
-# Warning: replacing previous import 'keras::text_to_word_sequence' by
-# 'kerasR::text_to_word_sequence' when loading 'DeepGenomeScan'
-# Warning: replacing previous import 'keras::normalize' by
-# 'kerasR::normalize' when loading 'DeepGenomeScan'
-# Warning: replacing previous import 'keras::pad_sequences' by
-# 'kerasR::pad_sequences' when loading 'DeepGenomeScan'
-# Warning: replacing previous import 'caret::confusionMatrix' by
-# 'RSNNS::confusionMatrix' when loading 'DeepGenomeScan'
-# Warning: replacing previous import 'caret::train' by 'RSNNS::train' when
-# loading 'DeepGenomeScan'
 
 if (!requireNamespace("KLFDAPC", quietly=TRUE))
 devtools::install_github("xinghuq/KLFDAPC")
@@ -125,138 +113,34 @@ 

if (!requireNamespace("RNNS", quietly=TRUE)) install.packages("RNNS") -# Warning: package 'RNNS' is not available (for R version 3.6.1) if (!requireNamespace("RNNS", quietly=TRUE)) -install.packages("RNNS") -# Warning: package 'RNNS' is not available (for R version 3.6.1)

+install.packages("RNNS")

Our package incorporates several most commonly used deep learning libaries, such as Tensorflow, keras, RNNS, H2o, FCNN4R. This tutorial will show you how to construct your own deep learning model, tune the model, and get the SNP importance scores.

 library(DeepGenomeScan)
 library(caret)### for ML calling functions and performance estimation
 library(keras) ### for DL
-library("tensorflow")
-library("caretEnsemble")
+library(tensorflow)
+library(caretEnsemble)
 library(kerasR)
-library("RSNNS")
 library(NeuralNetTools)

Preparing example data

-

The data used here is a single simulated data containing 640 individuals with 1000 SNPs that assumed under the environmental selection. There are 10 environmental factors acting on these population with different gradients. Details about the simulation and data can be found in Capblancq et al. 2018 (https://doi.org/10.1111/1755-0998.12906).

+

The data used here is a single simulated data containing 640 individuals with 1000 SNPs that is assumed under the environmnetal selection. There are 10 environmental factors selecting 3 QTLs on these population with different gradients. Details about the simulated data can be found in Capblancq et al. 2018 (https://doi.org/10.1111/1755-0998.12906).

 f <- system.file('extdata',package='DeepGenomeScan')
 infile <- file.path(f, "sim1.csv")
 sim_example=read.csv(infile)
 genotype=sim_example[,-c(1:14)]
 env=sim_example[,2:11]
-str(sim_example)
-# 'data.frame': 640 obs. of  1014 variables:
-#  $ pop_name: num  0 0 0 0 0 0 0 0 0 0 ...
-#  $ envir1  : num  2.62 2.62 2.62 2.62 2.62 ...
-#  $ envir2  : num  3.41 3.41 3.41 3.41 3.41 ...
-#  $ envir3  : num  1.82 1.82 1.82 1.82 1.82 ...
-#  $ envir4  : num  2.09 2.09 2.09 2.09 2.09 ...
-#  $ envir5  : num  2.65 2.65 2.65 2.65 2.65 ...
-#  $ envir6  : num  1.92 1.92 1.92 1.92 1.92 ...
-#  $ envir7  : num  2.75 2.75 2.75 2.75 2.75 ...
-#  $ envir8  : num  -0.529 -0.529 -0.529 -0.529 -0.529 ...
-#  $ envir9  : num  3.56 3.56 3.56 3.56 3.56 ...
-#  $ envir10 : num  8.36 8.36 8.36 8.36 8.36 ...
-#  $ qtrait1 : num  8.52 6.7 6.91 7 8.56 ...
-#  $ qtrait2 : num  5.7 6.17 4.43 10.1 6.36 ...
-#  $ qtrait3 : num  7.04 1.71 8.22 8.1 8.7 ...
-#  $ X       : int  2 2 1 0 1 2 0 1 0 0 ...
-#  $ X.1     : int  1 0 1 0 0 1 0 1 0 1 ...
-#  $ X.2     : int  1 1 0 1 0 1 0 1 2 2 ...
-#  $ X.3     : int  2 0 1 0 1 0 1 1 1 0 ...
-#  $ X.4     : int  1 0 2 1 2 1 1 2 1 1 ...
-#  $ X.5     : int  1 2 2 0 1 1 0 1 2 1 ...
-#  $ X.6     : int  1 1 1 1 1 1 0 1 1 1 ...
-#  $ X.7     : int  2 0 1 2 1 2 1 2 2 1 ...
-#  $ X.8     : int  2 1 1 1 1 2 1 1 2 0 ...
-#  $ X.9     : int  2 1 2 0 1 1 1 1 1 1 ...
-#  $ X.10    : int  0 1 1 1 1 1 0 1 1 1 ...
-#  $ X.11    : int  2 1 2 0 1 2 0 1 2 1 ...
-#  $ X.12    : int  1 0 1 1 1 0 1 2 1 0 ...
-#  $ X.13    : int  1 2 1 1 1 0 1 0 1 0 ...
-#  $ X.14    : int  1 1 1 1 0 0 1 0 1 0 ...
-#  $ X.15    : int  0 2 1 1 1 1 1 1 1 0 ...
-#  $ X.16    : int  2 0 2 1 1 1 1 1 1 2 ...
-#  $ X.17    : int  2 1 1 0 1 2 1 2 0 2 ...
-#  $ X.18    : int  1 1 0 0 0 1 0 1 1 2 ...
-#  $ X.19    : int  1 2 1 2 1 1 2 2 1 0 ...
-#  $ X.20    : int  0 1 1 0 1 1 1 0 1 1 ...
-#  $ X.21    : int  1 1 0 1 2 0 1 0 1 2 ...
-#  $ X.22    : int  1 2 1 1 0 1 1 2 1 1 ...
-#  $ X.23    : int  1 2 0 1 1 2 0 1 0 1 ...
-#  $ X.24    : int  1 2 2 1 0 1 2 0 1 1 ...
-#  $ X.25    : int  2 1 1 0 2 1 2 1 1 1 ...
-#  $ X.26    : int  2 1 1 1 2 2 1 2 1 2 ...
-#  $ X.27    : int  0 1 2 1 2 2 2 2 1 1 ...
-#  $ X.28    : int  1 0 1 2 2 1 2 1 1 1 ...
-#  $ X.29    : int  0 1 1 1 1 1 2 2 2 1 ...
-#  $ X.30    : int  0 2 1 1 0 1 2 1 1 1 ...
-#  $ X.31    : int  0 1 1 1 1 0 2 1 2 1 ...
-#  $ X.32    : int  0 0 1 1 1 1 1 0 1 2 ...
-#  $ X.33    : int  1 1 1 1 1 2 0 0 2 2 ...
-#  $ X.34    : int  1 1 0 0 0 1 0 1 0 1 ...
-#  $ X.35    : int  1 2 1 1 2 1 1 2 1 1 ...
-#  $ X.36    : int  0 1 0 0 0 1 1 1 0 1 ...
-#  $ X.37    : int  1 2 2 1 2 1 2 0 1 1 ...
-#  $ X.38    : int  2 1 1 1 1 2 0 1 1 1 ...
-#  $ X.39    : int  0 1 2 1 1 0 2 1 0 1 ...
-#  $ X.40    : int  2 1 2 0 2 1 1 1 1 1 ...
-#  $ X.41    : int  1 1 1 2 1 0 1 1 0 1 ...
-#  $ X.42    : int  1 2 2 1 1 1 2 1 2 2 ...
-#  $ X.43    : int  2 2 0 2 1 1 1 1 1 2 ...
-#  $ X.44    : int  1 1 2 1 2 0 1 0 0 0 ...
-#  $ X.45    : int  1 1 1 0 0 1 2 1 1 0 ...
-#  $ X.46    : int  0 1 0 0 1 1 0 1 1 2 ...
-#  $ X.47    : int  1 1 1 2 1 2 1 2 1 0 ...
-#  $ X.48    : int  2 2 1 2 1 1 1 2 0 2 ...
-#  $ X.49    : int  0 0 2 1 1 1 2 2 2 0 ...
-#  $ X.50    : int  0 1 0 1 0 1 0 1 1 1 ...
-#  $ X.51    : int  0 0 0 0 0 0 1 2 0 2 ...
-#  $ X.52    : int  1 1 1 0 1 1 1 1 1 1 ...
-#  $ X.53    : int  0 0 0 1 0 1 0 2 0 2 ...
-#  $ X.54    : int  1 1 1 2 1 0 1 1 1 1 ...
-#  $ X.55    : int  1 1 2 2 2 0 1 0 0 1 ...
-#  $ X.56    : int  1 1 1 0 2 1 1 2 1 2 ...
-#  $ X.57    : int  1 2 1 1 2 1 1 2 1 2 ...
-#  $ X.58    : int  1 0 0 0 0 0 1 1 0 0 ...
-#  $ X.59    : int  0 2 2 1 2 0 1 1 1 2 ...
-#  $ X.60    : int  0 0 0 1 2 1 0 1 1 1 ...
-#  $ X.61    : int  1 0 0 1 0 0 1 0 1 1 ...
-#  $ X.62    : int  2 2 2 1 0 0 1 2 2 1 ...
-#  $ X.63    : int  0 0 1 1 0 1 0 1 1 0 ...
-#  $ X.64    : int  1 0 0 1 1 1 0 0 0 0 ...
-#  $ X.65    : int  1 1 0 1 1 2 0 1 1 1 ...
-#  $ X.66    : int  0 1 0 1 1 1 0 1 1 1 ...
-#  $ X.67    : int  1 1 1 1 0 1 1 1 0 1 ...
-#  $ X.68    : int  1 1 1 1 2 1 1 2 1 0 ...
-#  $ X.69    : int  0 2 0 1 1 1 1 1 1 1 ...
-#  $ X.70    : int  0 1 0 1 0 2 1 1 1 0 ...
-#  $ X.71    : int  1 2 2 1 1 0 1 1 0 0 ...
-#  $ X.72    : int  0 2 1 0 1 2 1 1 1 1 ...
-#  $ X.73    : int  2 1 1 1 1 2 2 1 2 0 ...
-#  $ X.74    : int  1 1 1 2 0 0 1 0 0 1 ...
-#  $ X.75    : int  2 0 0 0 0 0 1 1 1 0 ...
-#  $ X.76    : int  1 0 2 1 1 2 1 1 0 1 ...
-#  $ X.77    : int  0 1 0 1 1 1 2 0 1 2 ...
-#  $ X.78    : int  1 1 0 0 1 0 1 0 0 1 ...
-#  $ X.79    : int  2 0 1 0 2 1 1 1 1 1 ...
-#  $ X.80    : int  1 0 0 1 1 1 1 0 0 0 ...
-#  $ X.81    : int  1 1 2 2 2 2 1 1 1 1 ...
-#  $ X.82    : int  1 2 1 1 1 1 2 2 1 1 ...
-#  $ X.83    : int  1 1 1 1 2 2 1 1 1 0 ...
-#  $ X.84    : int  2 1 2 0 1 0 1 1 1 2 ...
-#   [list output truncated]
+#str(sim_example)

Basic model architecture construction

-

The principal steps for implementing user-defined model are, first constructing the model architecture and compiling the model to make sure the basic model works; and then next step integrating it to DeepGenomeScan framework. Now we construct a MLP model and complie it using keras first.

+

The principal steps for implementing user-defined model are, first constructing the model architecture and compiling the model to make sure the basic model works; and then integrating the model to DeepGenomeScan framework. Below, we construct a MLP model and complie it using keras.

 ###################################### example of MLP_keras_drop out##########################
 set.seed(123)
@@ -310,8 +194,8 @@ 

Compiling model using DeepGenomeScan

Compiling the deep learning model using DeepGenomeScan requires user to store four key components for the model: 1.Model parameters and tuning values, 2.deep learning model architecture and fitted model, 3.model predict method/function or probability estimation (optional), 4.importance function.

-

In terms of the (1) model parameters and tuning values, users should set their own model parameters, parameter labels, and tuning methods (grid or random search), the range of the parameter values if grid, or the rule to search the parameter space if random based on the architecture of the model. With respect to (2) deep learning model architecture and fitted model, users should put their compiled (working) model (including the fitted model) under the same data list. In addition, users should also write the predict function for model tuning and importance function for estimating SNP importance after the optimal model is returned. All these components should store according to the below instruction.

-

Below we construct a MLP model and use DeepGenomeScan framework to compile it.

+

In terms of the (1) model parameters and tuning values, users should set their own model parameters, parameter labels, tuning methods (grid or random search), and the range of the parameter values if using grid search or the rule to search the parameter space if using random search based on the architecture of the model. With respect to (2) deep learning model architecture and fitted model, users should put their compiled (working) model (including the fitted model) under the same data list. In addition, users should also write the predict function for model tuning and importance function for estimating SNP importance after the optimal model is returned. All these components should be stored/structured according to the below instruction.

+

In what folllows, we construct a MLP model and use DeepGenomeScan framework to compile it.

@@ -497,7 +381,7 @@

genotype_norm=as.data.frame(apply(genotype,2,normalize)) ### Doing DeepGenomeScan parameter tuning -model_Keras_mlp<- DeepGenomeScan(x=(genotype_norm),y=(env$envir1), +model_Keras_mlp<- DeepGenomeScan(genotype_norm,env$envir1, method=modelmlpkerasdropout, metric = "MAE",## "Accuracy", "RMSE","Rsquared" tuneLength = 10, ### number of tunable parameters to search, here this example uses just 10 for saving time. Users should set a large number in order to find a better model @@ -553,7 +437,7 @@

save.image(file = "MLP_Sim1_env1_test.RData") ###################################### working end###############################################

-

This is a complete process for doing deep learing based genome scan using user-defined model. Because keras uses python tensorflow as the backend, we recommend users configuring the python tensorflow, keras that compatible with R before constructing this model.

+

This is a complete process for doing deep learing based genome scan using user-defined model. Because keras uses python (tensorflow) as the backend, we recommend users configuring the python version of tensorflow and keras properly so that they are compatible with R before constructing this model.

Below is a model constructed using RSNNS library.

@@ -669,7 +553,7 @@

model_RSNNS_mlp<- DeepGenomeScan(as.matrix(genotype_norm),env[,i], method=modelRSNNSmlpdecay, metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" - tuneLength = 50, ### search 50 parameter combinations + tuneLength = 50, ### search 50 parameter combinations, users should increase the tune length to find a better model # tuneGrid=CNNGrid, ### or search 100 combinations of parameters using random tuneLength=100 verbose=0,# verbose=1 is reporting the progress,o is sclience trControl = econtrol1,importance = FALSE) @@ -684,24 +568,27 @@

Sys.time() -save.image(file = "Model_RSNNS_mlp_env1_sim_test_Final.RData")

-

This implementation is also not fast, after model tuning, we estimated the SNP importance score that under the selection of 10 environmental factors.

+save.image(file = "Model_RSNNS_mlp_env1_sim_test.RData") +

This implementation is also not fast. After model tuning, we estimated the SNP importance score that under the selection of 10 environmental factors.

-
+

-Ploting Manhattan plot

+Plotting Manhattan plot
+####
 
 Loci<-rep("Neutral", 1000)
 Loci[c(1,11,21,31,41,51,61,71,81,91)]<-"QTL1"
 Loci[c(101,111,121,131,141,151,161,171,181,191)]<-"QTL2"
 Loci[c(201,211,221,231,241,251,261,271,281,291)]<-"QTL3"
 
- scaled_imp=normalizeData(RSNNS_simimp1$importance,type = "0_1")
-SNPimp<-data.frame(index = c(1:1000), MLP= -log10(scaled_imp),Loci=Loci)
+### RSNNS_simimp is a dataframe containing all SNP importance values (10 importance for all SNPs) from 10 environmnetal factors, users should import the SNP importance into one dataset
 
+SNPimpq=DLqvalues(RSSNS_simimp,K=10)
+SNPimp<-data.frame(index = c(1:1000), MLP= -log10(SNPimpq),Loci=Loci)
 Selected_Loci=SNPimp$Loci[-which(SNPimp$Loci=="Neutral")]
 
+## Plotting Manhattan plot
 p1 <- ggplot() +
   geom_point(aes(x=SNPimp$index[-which(SNPimp$Loci!="Neutral")], y=SNPimp$MLP[-which(SNPimp$Loci!="Neutral")]), col = "gray83") +
   geom_point(aes(x=as.vector(SNPimp$index[-which(SNPimp$Loci=="Neutral")]), y=as.vector(SNPimp$MLP[-which(SNPimp$Loci=="Neutral")]), colour = Selected_Loci)) +
@@ -709,7 +596,7 @@ 

ylim(0,10) + theme_bw() p1

-

Manhattan plot not show here because of time.

+

Manhattan plot not show here because of exhaust time. Make sure increasing the tune length to find a good model.

@@ -722,7 +609,6 @@

data(neuraldat) set.seed(123) -### NeoralSen uses The importance of a given input is defined as the distribution of the derivatives of the output with respect to that input in each training data point. #################### ##custome two other activation functions, softplus and relu @@ -748,13 +634,13 @@

registerDoParallel(cl) multi_net1 = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10) ,stepmax=1e8,rep=1, err.fct = "sse",act.fct="logistic",linear.output =TRUE) ### it takes within a minute to finish and converge using the older version. -multi_net = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10,11) ,stepmax=1e+11 ,rep=1, err.fct = "sse",act.fct=softplus,linear.output =TRUE) ### it takes within a minute to finish and converge using the older version. +multi_net2 = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10,11) ,stepmax=1e+11 ,rep=1, err.fct = "sse",act.fct=softplus,linear.output =TRUE) ### it takes within a minute to finish and converge using the older version. save(multi_net,file = "neuralnet_1_example_softplus.RData") -multi_net = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10,11) ,stepmax=1e+11 ,rep=1, err.fct = "sse",act.fct=customRelu,linear.output =TRUE) ### it takes within a minute to finish and converge using the older version. +multi_net3 = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10,11) ,stepmax=1e+11 ,rep=1, err.fct = "sse",act.fct=customRelu,linear.output =TRUE) ### it takes within a minute to finish and converge using the older version. ### latest version can use relu -multi_net = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10,11) ,stepmax=1e9 ,rep=1, act.fct="relu",output.act.fct=logistic,linear.output =FALSE) ### some cases,takes longer, I do not know why since I installed the developement version +multi_net4 = neuralnet(simf, algorithm= "rprop+", data=sim_example, hidden = c(6,9,10,11) ,stepmax=1e9 ,rep=1, act.fct="relu",output.act.fct=logistic,linear.output =FALSE) ### some cases,takes longer, I do not know why since I installed the developement version olden(multi_net1,bar_plot = FALSE) stopCluster(cl) @@ -1629,7 +1515,7 @@

## repeated ten times repeats = 5,search = "random") genotype_norm=as.data.frame(apply(genotype,2,normalize)) -model_Keras_CNN<- DeepGenomeScan(x=(genotype_norm),y=(env$envir1), +model_Keras_CNN<- DeepGenomeScan(genotype_norm,env$envir1, method=CNNsgd, metric = "MAE",## "Accuracy", "RMSE","Rsquared" tuneLength = 10, ### 11 tunable parameters 11^2 @@ -1684,7 +1570,7 @@

VarImps_nullcnn=Tensor_sim_imp_NULL_cnn$CNN_Decrease_acc save.image(file = "CNN_Sim1_env1_test.RData")

-

We demonstrated how to construct, compile and implement deep learing based genome scan step by step using different libraries. These models are also ready-to-use in our DeepGenomeScan framework. Users just set the name of the method to choose the corresponding model. The models available can be found in our documentation and “DeepGenomeScan Models” section.

+

We demonstrated how to construct, compile and implement deep learing based genome scan step by step using different libraries. These models are also ready-to-use in our DeepGenomeScan framework. Users just need to change “method” names to choose the corresponding model. The models available can be found in our documentation and “DeepGenomeScan Models” section.

@@ -1703,15 +1589,15 @@

set.seed(999) options(warn=-1) -mod <- DeepGenomeScan(Y1 ~ X1 + X2 + X3, method = 'mlpWeightDecayML', data = neuraldat, linout = TRUE,metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" +mod <- DeepGenomeScan(genotype_norm,env$envir1,, method = 'mlpWeightDecayML', data = neuraldat, linout = TRUE,metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" tuneLength = 20, ### 11 tunable parameters 11^2 # tuneGrid=CNNGrid, ### or search 100 combinations of parameters using random tuneLength=100 verbose=0,# verbose=1 is reporting the progress,o is sclience trControl = econtrol1,importance = FALSE) varImp(mod) - -mod1 <- DeepGenomeScan(Y1 ~ X1 + X2 + X3, method = 'neuralnet', data = neuraldat, linout = TRUE,metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" +simf=as.formula(paste(colnames(env)[i],paste(names(sim_example[,15:1014]), collapse="+"),sep="~")) + mod1<- DeepGenomeScan(simf,data=sim_example, method = 'neuralnet', linout = TRUE,metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" tuneLength = 20, ### 11 tunable parameters 11^2 # tuneGrid=CNNGrid, ### or search 100 combinations of parameters using random tuneLength=100 verbose=0,# verbose=1 is reporting the progress,o is sclience @@ -1720,8 +1606,8 @@

varImp(mod1) - -mod2 <- DeepGenomeScan(Y1 ~ X1 + X2 + X3, method = 'mlpneuralnet', data = neuraldat, linout = TRUE,metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" +simf=as.formula(paste(colnames(env)[i],paste(names(sim_example[,15:1014]), collapse="+"),sep="~")) + mod2<- DeepGenomeScan(simf,data=sim_example, method = 'mlpneuralnet', linout = TRUE,metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE" tuneLength = 20, ### 11 tunable parameters 11^2 # tuneGrid=CNNGrid, ### or search 100 combinations of parameters using random tuneLength=100 verbose=0,# verbose=1 is reporting the progress,o is sclience diff --git a/docs/articles/DeepGenomeScan_model_list.html b/docs/articles/DeepGenomeScan_model_list.html index c55ae8a..54535a8 100644 --- a/docs/articles/DeepGenomeScan_model_list.html +++ b/docs/articles/DeepGenomeScan_model_list.html @@ -97,8 +97,8 @@

Available Models

The models below are available in DeepGenomeScan.

-
- +
+
diff --git a/docs/articles/home.html b/docs/articles/home.html index 9aaab0e..91e51b9 100644 --- a/docs/articles/home.html +++ b/docs/articles/home.html @@ -86,7 +86,7 @@

DeepGenomeScan : A Deep Learning Approach for Whole Genome Scan (WGS) and Genome-Wide Association Studies (GWAS)

-

This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic learning and computational design integrating deep learning (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)), robust resampling and cross validations methods, as well as Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting variations under natural selection or omics-based association studies, such as GWAS, PWAS, TWAS, MWAS. The package use the tensorflow as the backend. The design makes the implemention user-friendly. Users can adopt the package’s framework to study various ecological and evolutionary questions, but not only constraining in biology field.

+

This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic computational framework integrating different neural network architectures (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)) and robust resampling methods, as well as the Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting signatures of natural selection and performing omics-based genome-wide association studies, such as GWAS, PWAS, TWAS, MWAS. The design makes the implemention user-friendly. It is compatible with most self-defined machine learning models (the self-defined models shuold be complete, including tunable parameters, fitted model, predicted model, examples can be found in our tutorial). Users can adopt the package’s framework to study various evolutionary questions.

@@ -102,15 +102,18 @@

Note: Environment requirements: python should be installed and the python package of Keras and Tensorflow should also be installed and work properly with the system

+###DA and KLFDAPC are used for SpGenomeScan
+
+ if (!requireNamespace("DA", quietly=TRUE))
+ 
+  devtools::install_github("xinghuq/DA")
+
 requireNamespace("KLFDAPC")
 
  if (!requireNamespace("KLFDAPC", quietly=TRUE))
 
   devtools::Install_github("xinghuq/KLFDAPC")
   
- if (!requireNamespace("DA", quietly=TRUE))
- 
-  devtools::install_github("xinghuq/DA")
 
 if (!requireNamespace("keras", quietly=TRUE))
 
@@ -137,31 +140,18 @@ 

checking if Tensorflow works properly K0=keras::backend()

-
+

-library

- -
## Warning: replacing previous import 'keras::to_categorical' by
-## 'kerasR::to_categorical' when loading 'DeepGenomeScan'
-
## Warning: replacing previous import 'keras::text_to_word_sequence' by
-## 'kerasR::text_to_word_sequence' when loading 'DeepGenomeScan'
-
## Warning: replacing previous import 'keras::normalize' by
-## 'kerasR::normalize' when loading 'DeepGenomeScan'
-
## Warning: replacing previous import 'keras::pad_sequences' by
-## 'kerasR::pad_sequences' when loading 'DeepGenomeScan'
-
## Warning: replacing previous import 'caret::confusionMatrix' by
-## 'RSNNS::confusionMatrix' when loading 'DeepGenomeScan'
-
## Warning: replacing previous import 'caret::train' by 'RSNNS::train' when
-## loading 'DeepGenomeScan'
+loading library
-library(caret)### for ML calling functions and performance estimation
-library(keras) ### for DL
+library("DeepGenomeScan")
+library("caret")### for ML calling functions and performance estimation
+library("keras") ### for DL
 library("tensorflow")
 library("caretEnsemble")
-library(kerasR)
+library("kerasR")
 library("RSNNS")
-library(NeuralNetTools)
+library("NeuralNetTools")

@@ -193,7 +183,7 @@

DeepGenomeScan with “mlp” model

-h2o_mlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1,
+GSmlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1,
                                   method="mlp",
                                   metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE"
                                   tuneLength = 10, ### 11 tunable parameters 11^2
@@ -201,12 +191,17 @@ 

trControl = econtrol1) #### varIMP for SNPs -out=varImp(h2o_mlp,scale = FALSE)

+out=varImp(GSmlp,scale = FALSE)
Plot the SNP importance scores
-plot(out$Overall,  ylab="SNP importance")
+#### Plot only the importance, remember to scan all environmnet factors and use DLqvalue function to convert the multi-effect importance values to q-values +### here is only plot importance of an example of using one environment factor + scaled_imp=normalizeData(out$importance$Overall,type = "0_1") +SNPimp<-data.frame(index = c(1:1000), MLP= -log10(scaled_imp)) +plot(y=-SNPimp$MLP,x=1:1000L, ylab="SNP importance") +abline(h=2, col="blue")

Welcome any feedback and pull request.

diff --git a/docs/index.html b/docs/index.html index f480831..2f641ab 100644 --- a/docs/index.html +++ b/docs/index.html @@ -78,33 +78,33 @@

DeepGenomeScan : A Deep Learning Approach for Whole Genome Scan (WGS) and Genome-Wide Association Studies (GWAS)

-

This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic learning and computational design integrating deep learning (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)), robust resampling and cross validations methods, as well as Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting variations under natural selection or omics-based association studies, such as GWAS, PWAS, TWAS, MWAS. The package use the tensorflow as the backend. The design makes the implemention user-friendly. Users can adopt the package’s framework to study various ecological and evolutionary questions, but not only constraining in biology field.

+

This package implements the genome scan and genome-wide association studies using deep neural networks (i.e, Multi-Layer Perceptron (MLP), Convolutional Neural Network (CNN)). DeepGenomeScan offers heuristic computational framework integrating different neural network architectures (i.e.,Multi-Layer Perceptron (MLP), convolutional neural network(CNN)) and robust resampling methods, as well as the Model-Agnostic interpretation of feature importance for convolutional neural networks. DeepGenomeScan, in other words, deep learning for genome-wide scanning, is a deep learning approach for detecting signatures of natural selection and performing omics-based genome-wide association studies, such as GWAS, PWAS, TWAS, MWAS. The design makes the implemention user-friendly. It is compatible with most self-defined machine learning models (the self-defined models shuold be complete, including tunable parameters, fitted model, predicted model, examples can be found in our tutorial). Users can adopt the package’s framework to study various evolutionary questions.

Install packages

-

-library("devtools")
-
+
library("devtools")
 devtools::install_github("xinghuq/DeepGenomeScan")
-
-devtools::install_github("xinghuq/CaretPlus/pkg/caret")
-
+devtools::install_github("xinghuq/CaretPlus/pkg/caret")

Dependencies and environment requirements

Checking the python environment

-

-library("rappdirs")
+
library("rappdirs")
 library("reticulate")
 reticulate::use_python("/usr/bin/python3")
 library(caret) ### for ML calling functions and performance estimation, users should use the modified version at xinghuq/CaretPlus/caret instead of the original version
 library(keras)  
 library("tensorflow")
 
-##checking if Tensorflow works properly
+checking if Tensorflow works properly
 K0=keras::backend()
 
-
+
@@ -154,33 +153,29 @@

Preparing data

-

-f <- system.file('extdata',package='DeepGenomeScan')
+
f <- system.file('extdata',package='DeepGenomeScan')
 infile <- file.path(f, "sim1.csv")
 sim_example=read.csv(infile)
 genotype=sim_example[,-c(1:14)]
 env=sim_example[,2:11]
-str(sim_example)
-
+str(sim_example)

Setting the resampling method

-

-econtrol1 <- trainControl(## 5-fold CV, repeat 5 times
+
econtrol1 <- trainControl(## 5-fold CV, repeat 5 times
   method = "adaptive_cv",
   number = 5,
   ## repeated ten times
   repeats = 5,search = "random")
 set.seed(999)
-options(warn=-1
-
+options(warn=-1

DeepGenomeScan with “mlp” model


-h2o_mlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1,
+GSmlp<- DeepGenomeScan(as.matrix(genotype_norm),env$envir1,
                                   method="mlp",
                                   metric = "RMSE",## "Accuracy", "RMSE","Rsquared","MAE"
                                   tuneLength = 10, ### 11 tunable parameters 11^2
@@ -188,15 +183,18 @@ 

trControl = econtrol1) #### varIMP for SNPs - -out=varImp(h2o_mlp,scale = FALSE) +out=varImp(GSmlp,scale = FALSE)

Plot the SNP importance scores
-

-plot(out$Overall,  ylab="SNP importance")
-
+
#### Plot only the importance, remember to scan all environmnet factors and use DLqvalue function to convert the multi-effect importance values to q-values
+### here is only plot importance of an example of using one environment factor
+ scaled_imp=normalizeData(out$importance$Overall,type = "0_1")
+SNPimp<-data.frame(index = c(1:1000), MLP= -log10(scaled_imp))
+plot(y=-SNPimp$MLP,x=1:1000L,  ylab="SNP importance")
+abline(h=2, col="blue")
+

Welcome any feedback and pull request.

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index db19029..1f08fe2 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -6,7 +6,7 @@ articles: Deep Learning Architecture Construction: Deep Learning Architecture Construction.html DeepGenomeScan_model_list: DeepGenomeScan_model_list.html home: home.html -last_built: 2020-11-08T20:56Z +last_built: 2020-11-13T00:02Z urls: reference: http://github.com/xinghuq/DeepGenomeScan/reference article: http://github.com/xinghuq/DeepGenomeScan/articles diff --git a/docs/reference/DLqvalues.html b/docs/reference/DLqvalues.html index 3f37008..f3b43b4 100644 --- a/docs/reference/DLqvalues.html +++ b/docs/reference/DLqvalues.html @@ -229,7 +229,7 @@

Examp #> return(data.frame(p.values = reschi2test, q.values = q.values_DL, #> padj = padj)) #> } -#> <environment: 0xdc19b90>

+#> <environment: 0xc545c50>