-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/QUBIC@109560 bc3139a8-67e5-0310-9ffc-ced21a209358
- Loading branch information
d.tenenbaum
committed
Oct 13, 2015
0 parents
commit 7422dc2
Showing
45 changed files
with
3,417 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
Package: QUBIC | ||
Type: Package | ||
Title: An R package for qualitative biclustering in support of gene | ||
co-expression analyses | ||
Version: 0.99.0 | ||
Date: 2015-10-02 | ||
Authors@R: c(person("Yu", "Zhang", role = c("aut", "cre"), email = "zy26@jlu.edu.cn"), | ||
person("Qin", "Ma", role = "aut", email = "qin.ma@sdstate.edu")) | ||
Author: Yu Zhang <zy26@jlu.edu.cn>, | ||
Qin Ma <qin.ma@sdstate.edu> | ||
Maintainer: Yu Zhang <zy26@jlu.edu.cn> | ||
Depends: biclust | ||
Suggests: knitr | ||
Description: The core function of this R package is to provide the implementation of the well-cited and well-reviewed QUBIC algorithm, aiming to deliver an effective and efficient biclustering capability. | ||
This package also includes the following related functions: | ||
(i) a qualitative representation of the input gene expression data, through a well-designed discretization way considering the underlying data property, which can be directly used in other biclustering programs; | ||
(ii) visualization of identified biclusters using heatmap in support of overall expression pattern analysis; | ||
(iii) bicluster-based co-expression network elucidation and visualization, where different correlation coefficient scores between a pair of genes are provided; and | ||
(iv) a generalize output format of biclusters and corresponding network can be freely downloaded so that a user can easily do following comprehensive functional enrichment analysis (e.g. DAVID) and advanced network visualization (e.g. cytoscape). | ||
License: CC BY-NC-ND 4.0 | ||
Imports: Rcpp (>= 0.11.0) | ||
LinkingTo: Rcpp | ||
SystemRequirements: C++11, Rtools (>= 3.1) | ||
VignetteBuilder: knitr | ||
biocViews: StatisticalMethod, Microarray, DifferentialExpression, | ||
MultipleComparison, Clustering, Visualization | ||
NeedsCompilation: yes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
useDynLib(QUBIC) | ||
exportPattern("^[[:alpha:]]+") | ||
importFrom(Rcpp, evalCpp) | ||
importFrom(biclust, biclust) | ||
importFrom(biclust, bicluster) | ||
importFrom(biclust, BiclustResult) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# This file was generated by Rcpp::compileAttributes | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
#' @rdname QUBIC | ||
#' @backref src/rcpp_qubic.cpp | ||
qubic <- function(matrix, r, q, c, o, f, k, P, S, C, verbose) { | ||
.Call('QUBIC_qubic', PACKAGE = 'QUBIC', matrix, r, q, c, o, f, k, P, S, C, verbose) | ||
} | ||
|
||
#' @rdname QUBIC | ||
#' @backref src/rcpp_qubic.cpp | ||
qubic_d <- function(matrix, c, o, f, k, P, S, C, verbose) { | ||
.Call('QUBIC_qubic_d', PACKAGE = 'QUBIC', matrix, c, o, f, k, P, S, C, verbose) | ||
} | ||
|
||
#' Create a qualitative discrete matrix for a given gene expression matrix | ||
#' | ||
#' \code{qudiscretize} delivers a discret matrix. It is useful if we just want to get a discretized matrix. | ||
#' | ||
#' @details | ||
#' \code{qudiscretize} convert a given gene expression matrix to a discret matrix. | ||
#' It's implimented in C++, providing a increase in speed over the C equivalent. | ||
#' | ||
#' @usage qudiscretize(x, r = 1L, q = 0.06) | ||
#' @inheritParams QUBIC | ||
#' | ||
#' @return A qualitative discrete matrix | ||
#' | ||
#' @name qudiscretize | ||
#' | ||
#' @aliases qudiscretize qdiscretize BCQU | ||
#' | ||
#' @examples | ||
#' # Qualitative discretize yeast microarray data | ||
#' data(BicatYeast) | ||
#' qudiscretize(BicatYeast[1:7, 1:5]) | ||
#' | ||
#' @seealso \code{\link{QUBIC}} \code{\link{discretize}} | ||
#' @backref src/rcpp_qubic.cpp | ||
qudiscretize <- function(x, r = 1L, q = 0.06) { | ||
.Call('QUBIC_qudiscretize', PACKAGE = 'QUBIC', x, r, q) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,241 @@ | ||
###################################################################### | ||
#' QUBIC: A Qualitative Biclustering Algorithm for Analyses of Gene Expression Data | ||
#' | ||
#'@description | ||
#' \code{QUBIC} is a biclustering package, with source code upgrading from C code to C++ code. | ||
#' The updated source code can avoid memory allocation error and is much efficient than the original one. | ||
#' Based on our preliminary analysis, it can save 40\% running time on a plant microarray data. | ||
#' | ||
#'@details | ||
#' For a given representing matrix of a microarray data set, | ||
#' we construct a weighted graph G with genes represented as vertices, edges connecting every pair of genes, | ||
#' and the weight of each edge being the similarity level between the two corresponding (entire) rows. | ||
#' Clearly, the higher a weight, the more similar two corresponding rows are. | ||
#' Intuitively, genes in a bicluster should induce a heavier subgraph of G because under a subset of the conditions, | ||
#' these genes have highly similar expression patterns that should make the weight of each involved edge heavier, | ||
#' comparing to the edges in the background. | ||
#' But it should be noted that some heavy subgraph may not necessarily correspond to a bicluster, | ||
#' i.e. genes from a heavy subgraph may not necessarily have similar expression patterns | ||
#' because different edges in a subgraph may have heavier weights under completely different subsets of conditions. | ||
#' It should also be noted that recognizing all heavy subgraphs in a weighted graph itself is | ||
#' computationally intractable because identification of maximum cliques in a graph is a special case of this, | ||
#' and the maximum clique problem is a well known intractable problem (NP-hard). | ||
#' So in our solution, we do not directly solve the problem of finding heavy subgraphs in a graph. | ||
#' Instead, we built our biclustering algorithm based on this graph representation of a microarray gene expression data, | ||
#' and tackle the biclustering problem as follows. | ||
#' We find all feasible biclusters (I,J) in the given data set such that min\{|I|, |J|\} is as large as possible, | ||
#' where I and J are subsets of genes and conditions, respectively. | ||
#' | ||
#' @name QUBIC | ||
#' | ||
#' @aliases QUBIC qubic BCQU bcqu BCQU.d bcqu.d biclust method | ||
#' | ||
#' @param x the input data matrix, which could be the normalized gene expression matrix or its qualitative representation from Qdiscretization or other discretization ways. | ||
#' (for example: a qualitative representation of gene expression data) \cr | ||
#' For \code{BCQU()}, the data matrix should be real \cr | ||
#' For \code{BCQU.d()}, the data matrix should be discretized as integer. | ||
#' Zeros in the matrix will be treated as non-relevant value. | ||
#' @param r Affect the granularity of the biclusters. The range of possible ranks. | ||
#' A user can start with a small value of \code{r} | ||
#' (the default value is \code{1} so the corresponding data matrix consists of values '\code{1}', '\code{-1}' and '\code{0}'), | ||
#' evaluate the results, and then use larger values | ||
#' (should not be larger than half of the number of the columns) to look for fine structures within the identified biclusters. | ||
#' @param q Affect the granularity of the biclusters. The percentage of the regulating conditions for each gene. | ||
#' The choice of \code{q}'s value depends on the specific application goals; | ||
#' that is if the goal is to find genes that are responsive to local regulators, | ||
#' we should use a relatively small \emph{q}-value; otherwise we may want to consider larger \emph{q}-values. | ||
#' The default value of \code{q} is \code{0.06} in QUBIC | ||
#' (this value is selected based on the optimal biclustering results on simulated data). | ||
#' @param c The required consistency level of a bicluster. The default value of \code{c} is \code{0.95} | ||
#' @param o The number of output biclusters. \code{o}'s default value is \code{100}. | ||
#' @param f Control parameter, to control the level of overlaps between to-be-identified biclusters. | ||
#' The filter cut-off for data post-processing. For overlaps among to-be-identified biclusters. | ||
#' Its default value is set to \code{1} to ensure that no two reported biclusters overlap more than \code{f}. | ||
#' @param k The minimum column width of the block, minimum \code{2} columns. | ||
#' @param type The constrain type. \cr | ||
#' If \code{type} is omitted or \code{type="default"}, the original objective function in QUBIC will be used, which is to maximize the minimal value of numbers of rows and columns. | ||
#' If \code{type="area"}, the program tries to identify the bicluster with the maximal value of number of rows multiplied by number of columns. | ||
#' Other types are reserved for future use. | ||
#' @param P the flag to enlarge current bicluster using a \emph{p}-value contrain, | ||
#' which is defined based on its significance of expression consistency comparing to some simulated submatrix. Default: \code{FALSE}. | ||
#' @param C the flag to set the lower bound of the condition number in a bicluster as 5\% of the total condition number in the input data. | ||
#' Only suggested to use when the input data has a few conditions (e.g. less than \code{20}). Default: \code{FALSE}. | ||
#' @param verbose If '\code{TRUE}', prints extra information on progress. | ||
#' @return Returns an Biclust object, which contains bicluster candidates | ||
#' | ||
#' @seealso \code{\link{qudiscretize}} \code{\link{qunetwork}} \code{\link{qunet2xml}} \code{\link{biclust}} | ||
#' | ||
#' @references Li G, Ma Q, Tang H, Paterson AH, Xu Y. | ||
#' QUBIC: a qualitative biclustering algorithm for analyses of gene expression data. | ||
#' \emph{Nucleic Acids Research}. 2009;\bold{37(15)}:e101. doi:10.1093/nar/gkp491. | ||
#' @references Zhou F, Ma Q, Li G, Xu Y. | ||
#' QServer: A Biclustering Server for Prediction and Assessment of Co-Expressed Gene Clusters. | ||
#' \emph{PLoS ONE}. 2012;\bold{7(3)}:e32660. doi: 10.1371/journal.pone.0032660 | ||
#' | ||
#' @keywords qubic biclust bicluster bi-cluster biclustering bi-clustering | ||
NULL | ||
|
||
#' \code{BCQU} performs a QUalitative BIClustering. | ||
#' | ||
#' @name BCQU | ||
#' | ||
#' @rdname QUBIC | ||
#' | ||
#' @examples | ||
#' # Random matrix with embedded bicluster | ||
#' test <- matrix(rnorm(5000),100,50) | ||
#' test[11:20,11:20] <- rnorm(100,3,0.3) | ||
#' res<-biclust(test, method = BCQU()) | ||
#' summary(res) | ||
#' show(res) | ||
#' names(attributes(res)) | ||
#' | ||
#' \dontrun{ | ||
#' # Load microarray matrix | ||
#' data(BicatYeast) | ||
#' | ||
#' # Display number of column and row of BicatYeast | ||
#' ncol(BicatYeast) | ||
#' nrow(BicatYeast) | ||
#' #Bicluster on microarray matrix | ||
#' system.time(res<-biclust(BicatYeast, method=BCQU())) | ||
#' | ||
#' # Show bicluster info | ||
#' res | ||
#' # Show the first bicluster | ||
#' bicluster(BicatYeast, res, 1) | ||
#' # Get the 4th bicluster | ||
#' bic4 <- bicluster(BicatYeast, res, 4)[[1]] | ||
#' | ||
#' # or | ||
#' bic4 <- bicluster(BicatYeast, res)[[4]] | ||
#' # Show rownames of the 4th bicluster | ||
#' rownames(bic4) | ||
#' # Show colnames of the 4th bicluster | ||
#' colnames(bic4) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Bicluster on selected of genes | ||
#' data(EisenYeast) | ||
#' genes <- c("YHR051W", "YKL181W", "YHR124W", "YHL020C", "YGR072W", | ||
#' "YGR145W", "YGR218W", "YGL041C", "YOR202W", "YCR005C") | ||
#' # same result as res<-biclust(EisenYeast[1:10,], method=BCQU()) | ||
#' res<-biclust(EisenYeast[genes,], method=BCQU()) | ||
#' res | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Get bicluster by row name = 249364_at | ||
#' bicluster(BicatYeast, res, which(res@@RowxNumber[which(rownames(BicatYeast)=="249364_at"),])) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Get bicluster by col name = cold_roots_6h | ||
#' bicluster(BicatYeast, res, which(res@@NumberxCol[,which(colnames(BicatYeast)=="cold_roots_6h")])) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # | ||
#' bicluster(BicatYeast, res, which(res@@NumberxCol[,which(colnames(BicatYeast)=="cold_roots_6h")])) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Draw a single bicluster using drawHeatmap {bicust} | ||
#' data(BicatYeast) | ||
#' res <- biclust(BicatYeast, BCQU(), verbose = FALSE) | ||
#' # Draw heatmap of the first cluster | ||
#' drawHeatmap(BicatYeast, res, 1) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Draw a single bicluster using heatmap {stats} | ||
#' data(BicatYeast) | ||
#' res <- biclust(BicatYeast, BCQU(), verbose = FALSE) | ||
#' bic10 <- bicluster(BicatYeast, res, 10)[[1]] | ||
#' | ||
#' # Draw heatmap of the 10th cluster using heatmap {stats} | ||
#' heatmap(as.matrix(t(bic10)), Rowv = NA, Colv = NA, scale = "none") | ||
#' | ||
#' # Draw heatmap of the 10th cluster using plot_heatmap {phyloseq} | ||
#' stopifnot(require("phyloseq")) | ||
#' plot_heatmap(otu_table(bic10, taxa_are_rows = TRUE)) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Draw a single bicluster with original data background and color options | ||
#' data(BicatYeast) | ||
#' res <- biclust(BicatYeast, BCQU(), verbose = FALSE) | ||
#' palette <- colorRampPalette(c("red", "yellow", "green"))(n = 100) | ||
#' # Draw heatmap of the first cluster with color | ||
#' drawHeatmap(BicatYeast, res, 1, FALSE, beamercolor = TRUE, paleta = palette) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Draw some overlapped biclusters | ||
#' data(BicatYeast) | ||
#' res <- biclust(BicatYeast, BCQU(), verbose = FALSE) | ||
#' biclusternumber(res, 1) | ||
#' biclusternumber(res, 3) | ||
#' # Draw overlapping heatmap | ||
#' heatmapBC(x = BicatYeast, bicResult = res, number = c(1, 3), local = TRUE) | ||
#' | ||
#' } | ||
#' \dontrun{ | ||
#' # Draw all the biclusters | ||
#' data(BicatYeast) | ||
#' res <- biclust(BicatYeast, BCQU(), verbose = FALSE) | ||
#' # Draw the first bicluster on heatmap | ||
#' heatmapBC(x = BicatYeast, bicResult = res, number = 1) | ||
#' # Draw all the biclusters, not working well. | ||
#' # Overlap plotting only works for two neighbor bicluster defined by the order in the number slot. | ||
#' heatmapBC(x = BicatYeast, bicResult = res, number = 0) | ||
#' | ||
#' } | ||
setClass('BCQU', | ||
contains = 'BiclustMethod', | ||
prototype = prototype( | ||
biclustFunction = function(x,...) { | ||
.qubiclust(x,...) | ||
} | ||
)) | ||
|
||
#' @describeIn QUBIC Performs a QUalitative BIClustering. | ||
#' @usage ## S4 method for class 'matrix,BCQU': | ||
#' biclust(x, method = BCQU(), r = 1, q = 0.06, c = 0.95, o = 100, f = 1, k = 2, | ||
#' type = "default", P = FALSE, C = FALSE, verbose = TRUE) | ||
BCQU <- function() { | ||
return(new('BCQU')) | ||
} | ||
|
||
#' QUBICD | ||
#' | ||
#' \code{BCQU.d} performs a QUalitative BIClustering for a discret matrix. | ||
#' | ||
#' @name BCQU.d-class | ||
#' | ||
#' @aliases qubic_d QUBICD QUD BCQU.d-class biclust,matrix,BCQU.d-method | ||
#' | ||
#' @rdname QUBIC | ||
#' | ||
#' @examples | ||
#' # Biclustering of discretized yeast microarray data | ||
#' data(BicatYeast) | ||
#' disc<-qudiscretize(BicatYeast[1:10,1:10]) | ||
#' biclust(disc, method=BCQU.d()) | ||
setClass('BCQU.d', | ||
contains = 'BiclustMethod', | ||
prototype = prototype( | ||
biclustFunction = function(x,...) { | ||
.qubiclust_d(x,...) | ||
} | ||
)) | ||
|
||
#' @describeIn QUBIC Performs a QUalitative BIClustering for a discret matrix. | ||
#' | ||
#' @usage ## S4 method for class 'matrix,BCQU.d': | ||
#' biclust(x, method = BCQU.d(), c = 0.95, o = 100, f = 1, k = 2, | ||
#' type = "default", P = FALSE, C = FALSE, verbose = TRUE) | ||
BCQU.d <- function() { | ||
return(new('BCQU.d')) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
.qubiclust <- function(x, r = 1, q = 0.06, | ||
c = 0.95, o = 100, f = 1, k = 2, | ||
type = "default", P = FALSE, C = FALSE, verbose = TRUE) { | ||
MYCALL <- match.call() | ||
S <- (type == "area") | ||
res <- qubic(x, r, q, c, o, f, k, P, S, C, verbose) | ||
return(BiclustResult( | ||
as.list(MYCALL), | ||
matrix(unlist(res["RowxNumber"]), ncol = as.numeric(res["Number"]), byrow = FALSE), | ||
matrix(unlist(res["NumberxCol"]), nrow = as.numeric(res["Number"]), byrow = FALSE), | ||
as.numeric(res["Number"]), | ||
res["info"] | ||
)) | ||
} | ||
.qubiclust_d <- function(x, | ||
c = 0.95, o = 100, f = 1, k = 2, | ||
type = "default", P = FALSE, C = FALSE, verbose = TRUE) { | ||
MYCALL <- match.call() | ||
S <- (type == "area") | ||
res <- qubic_d(x, c, o, f, k, P, S, C, verbose) | ||
return(BiclustResult( | ||
as.list(MYCALL), | ||
matrix(unlist(res["RowxNumber"]), ncol = as.numeric(res["Number"]), byrow = FALSE), | ||
matrix(unlist(res["NumberxCol"]), nrow = as.numeric(res["Number"]), byrow = FALSE), | ||
as.numeric(res["Number"]), | ||
res["info"] | ||
)) | ||
} |
Oops, something went wrong.