Merge branch 'develop' into scripts

FredHutch · Mar 24, 2016 · c48e219 · c48e219
2 parents 35053e5 + 5a88cc8
commit c48e219
Show file tree

Hide file tree

Showing 187 changed files with 22,244 additions and 3,521 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,17 @@
+# Set the default behavior, in case people don't have core.autocrlf set
+
+* text=auto
+
+# Text files that must have LF on checkout
+
+*.sh  text eol=lf
+*.R   text eol=lf
+*.html   text eol=lf
+
+# Files that are truly binary
+
+*.RData binary
+*.pdf   binary
+*.png   binary
+*.gif   binary
+*.jpg   binary
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -12,6 +12,11 @@ If you haven't already, start by signing up for a [GitHub account](https://githu
 
 ## Getting Started
 
+First review this simple flowchart to see the best place for you to start working and contributing to Oncoscape! 
+
+![oncoscape_user_flowchart_final](https://cloud.githubusercontent.com/assets/15098135/12570250/d3c0ab8a-c38a-11e5-85f5-61ff83c27cfe.png)
+
+
 You can clone this repository locally from GitHub using the "Clone in Desktop" 
 button from the main project site, or run this command in the Git Shell:
 

diff --git a/DOCKER.md b/DOCKER.md
@@ -76,7 +76,7 @@ If your output looks like the above, then you're Mac is ready to use Docker.
 
 ##3. Installing Docker on Windows
 
-Running a Docker environment on Microsoft Windows requires Windows 7, 8.x (Windows 10 is currently not supported).
+Running a Docker environment on Microsoft Windows requires Windows 7, 8.x, or 10
 
 To get docker running on on Windows, follow the instructions below: 
 
@@ -348,3 +348,4 @@ docker rmi oncodev
 ```
 
 You can see which containers are on you workstation with the "docker ps -a" command and which images are on your workstation with the "docker images" command.
+
diff --git a/Dockerfile b/Dockerfile
@@ -10,7 +10,8 @@ RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 51716619E084DAB9
 
 # Update the system and install packages
 RUN apt-get -y -qq update && apt-get -y -qq install \
-	r-base=3.2.3* \
+	r-base=3.2.2* \
+	r-recommended=3.2.2-1trusty0* \
 	vim \
 	make \
 	m4 \

diff --git a/Oncoscape/DESCRIPTION b/Oncoscape/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: OncoDev14
 Type: Package
 Title: Oncoscape:  Coordinated Exploration of Clinical and Molecular Cancer Data
-Version: 1.4.93
-Date: 2015-12-10
+Version: 1.4.94
+Date: 2016-03-10
 Author: Paul Shannon
 Maintainer:  <pshannon@fhcrc.org>
 Depends: R (>= 3.2.0), jsonlite, httpuv, SttrDataPackage, survival, base64enc, PCA, PLSR
@@ -20,7 +20,7 @@ Collate: OncoDev14.R
          wsGeneral.R
          wsDatasets.R
          wsSurvival.R
-	      wsOncoprint.R
+	 wsOncoprint.R
          oncoprint.R
          survival.R
          wsPLSR.R

diff --git a/Oncoscape/R/oncoprint.R b/Oncoscape/R/oncoprint.R
@@ -1,21 +1,75 @@
-create.oncoprint.input <- function(string, ds)
+
+random.samples.genes.oncoprint <- function(numberReceived, genes_all, patients_all)
+{
+  printf("*****receive number")
+        if(numberReceived > 40){
+            geneLength = sample(c(1:40),1)
+        }else{
+            geneLength = sample(c(1:as.integer(numberReceived-1)),1)
+        }
+        printf("*****geneLength is : %d\n", geneLength)
+        patientLength = as.integer(numberReceived) - geneLength
+        printf("*****patientLength is : %d\n", patientLength)
+        geneLowerBound = sample(c(1:(length(genes_all) - geneLength)),1)
+        printf("*****geneLowerBound is : %d\n", geneLowerBound)
+        if(length(patients_all) > patientLength){
+            patientLowerBound = sample(c(1:(length(patients_all) - patientLength)),1)   
+        }else{
+            patientLowerBound = sample(c(1:length(patients_all)),1)   
+        }
+
+        printf("*****patientLowerBound is : %d\n", patientLowerBound)
+        genes = genes_all[c(geneLowerBound:(geneLowerBound+geneLength-1))] 
+        printf("*****length of genes is: %d\n", length(genes))
+        patients = patients_all[c(patientLowerBound:(patientLowerBound+patientLength-1))]
+        printf("*****length of patients is: %d\n", length(patients))  
+    return <- list(genes=genes, patients=patients)
+} # random.samples.genes.oncoprint
+#-------------------------------------------------------------------------------
+create.oncoprint.input <- function(samplesAndGenes, ds)
 {
+    printf(" ======= entering create.oncoprint.input")
+
+    cmd <- sprintf("ds <- datasets[['%s']]", ds)
+    eval(parse(text=cmd))
+    #}else{
+    #    printf("***** datasets doesn't exits, create ds object")
+    #    printf("***** ds is a samplesAndGenes %s ", ds)
+    #    eval(parse(text=sprintf("ds <- %s", ds)))    
+    #    printf("***** ds structure %s", str(ds, max.level=2))
+    #}
+
     ds.matrices = SttrDataPackage:::matrices(ds)
     cnv <- ds.matrices$mtx.cn
     mut <- ds.matrices$mtx.mut
     if("mtx.mrna" %in% names(ds.matrices)){
         mrna <- ds.matrices$mtx.mrna
     }else{      mrna <- ds.matrices$mtx.mrna.bc }
+
     genes_all = unique(union(union(colnames(cnv),colnames(mut)),colnames(mrna)))
     patients_all = unique(union(union(rownames(cnv),rownames(mut)),rownames(mrna)))
+
+    patients = c();
+    genes = c();
+
 
-    if(any(string %in% genes_all) && any(string %in% substring(patients_all,1,12))){
-        patient_core_Ids <- string[string %in% substring(patients_all,1,12)]
-        patients <- patients_all[match(patient_core_Ids,substring(patients_all,1,12))]#locate back to the original patient IDs
-        genes <- string[string %in% genes_all]
-
+    if(is.numeric(samplesAndGenes)){
+        processed_message <- random.samples.genes.oncoprint(samplesAndGenes, genes_all, patients_all)
+        patients <- processed_message$patients
+        genes <- processed_message$genes
+    }else if(any(samplesAndGenes %in% genes_all) && any(samplesAndGenes %in% substring(patients_all,1,12))) {
+        patient_core_Ids <- samplesAndGenes[samplesAndGenes %in% substring(patients_all,1,12)]
+        patients <- patients_all[match(patient_core_Ids, substring(patients_all,1,12))]#locate back to the original patient IDs
+        genes <- samplesAndGenes[samplesAndGenes %in% genes_all]
+        printf("*****original samplesAndGenes and patients and genes processing block")
+    }
+
+    if(length(patients) == 0 || length(genes) == 0){
+        res = "It seems you only selected either patients or genes, please re-select to include both information"
+        printf("=== only genes or patients are selected, status failed\n")
+        return <- list(status="error", payload=toJSON(res))
+    }else{
         printf("=== entering into data processing")
-
         if(!is.null(cnv)){
             patients_cnv <- intersect(patients, rownames(cnv))
             genes_cnv <- intersect(genes, colnames(cnv))
@@ -41,10 +95,10 @@ create.oncoprint.input <- function(string, ds)
                 cnv_res_flattened[,3] <- gsub(-2,"HOMODELETED",cnv_res_flattened[,3])
                 cnv_res_flattened[,3] <- gsub(2,"AMPLIFIED",cnv_res_flattened[,3])
                 cnv_res_flattened[,3] <- gsub(1,"GAINED",cnv_res_flattened[,3])
-                if(dim(cnv_res_flattened)[1] == 0 ) rm(cnv_res_flattened)
+                #if(dim(cnv_res_flattened)[1] == 0 ) rm(cnv_res_flattened)
                 }
-        }
-        
+         }
+
         if(!is.null(mrna)){
             patients_mrna <- intersect(patients, rownames(mrna))
             genes_mrna <- intersect(genes, colnames(mrna))
@@ -71,10 +125,10 @@ create.oncoprint.input <- function(string, ds)
                 }else if(length(which(mrna_res_flattened$value < -2)) > 0){
                     mrna_res_flattened$value[which(mrna_res_flattened$value < -2)] <- "DOWNREGULATED"
                 }
-                if(dim(mrna_res_flattened)[1] == 0 ) rm(mrna_res_flattened)
+                #if(dim(mrna_res_flattened)[1] == 0 ) rm(mrna_res_flattened)
             }
         }
-        
+
         if(!is.null(mut)){
             patients_mut <- intersect(patients, rownames(mut))
             genes_mut <- intersect(genes, colnames(mut))
@@ -100,11 +154,10 @@ create.oncoprint.input <- function(string, ds)
                 #mut_res_flattened$value <- gsub("",NA,mut_res_flattened$value)
                 mut_res_flattened <- mut_res_flattened[which(mut_res_flattened$value != ""),]
                 mut_res_flattened$value <- rep("MISSENSE",nrow(mut_res_flattened)) #need to update with more features, such as truncated etc.
-                if(dim(mut_res_flattened)[1] == 0 ) rm(mut_res_flattened)
+                #if(dim(mut_res_flattened)[1] == 0 ) rm(mut_res_flattened)
             }
         }
-
-
+
         if(exists("cnv_res_flattened") & exists("mrna_res_flattened")){
             cnv_mrna_res_flattened <- merge(cnv_res_flattened, mrna_res_flattened,c('sample','gene'),all.x=T,all.y=T)
             if(exists("mut_res_flattened")){
@@ -130,23 +183,24 @@ create.oncoprint.input <- function(string, ds)
             res_flattened <- mut_res_flattened
             colnames(res_flattened) <- c("patient","gene","mut_type")
         }
-     
+
 
-
+        printf("=== res_flattened status:%d\n", exists("res_flattened"));
         if(exists("res_flattened")){
             r <- jsonlite:::toJSON(res_flattened, pretty = TRUE)
             #res = list(r,genes)
             res = list(r,genes)
             printf("=== printing result json file\n")
+            printf("=== dimension of res_flattened:%d, %d\n", dim(res_flattened)[1], dim(res_flattened)[2])
             return <- list(status="success", payload=toJSON(res))
         }else{
             res = "No overlapping patients or genes within dataset, please re-select"
-            printf("=== printing result json file, result is a string\n")
+            printf("=== printing result json file, result is a samplesAndGenes\n")
             return <- list(status="error", payload=toJSON(res))
         }
-    }else{
-        res = "It seems you only selected either patients or genes, please re-select to include both information"
-        printf("=== only genes or patients are selected, status failed\n")
-        return <- list(status="error", payload=toJSON(res))
+
     }
-}
+
+
+} # create.oncoprint.input
+#-------------------------------------------------------------------------------
diff --git a/Oncoscape/R/wsDatasets.R b/Oncoscape/R/wsDatasets.R
@@ -7,6 +7,8 @@ addRMessageHandler("getPatientHistoryDxAndSurvivalMinMax", "getPatientHistoryDxA
 addRMessageHandler("getSampleDataFrame", "getSampleDataFrame")
 addRMessageHandler("getGeneSetNames",    "wsGetGeneSetNames")
 addRMessageHandler("getGeneSetGenes",    "wsGetGeneSetGenes")
+addRMessageHandler("getExpressionDataSetNames",    "wsGetExpressionDataSetNames")
+addRMessageHandler("getExpressionDataSetExpression",    "wsGetExpressionDataSetExpression")
 addRMessageHandler("getSampleCategorizationNames", "wsGetSampleCategorizationNames")
 addRMessageHandler("getSampleCategorization",      "wsGetSampleCategorization")
 addRMessageHandler("getMarkersNetwork", "getMarkersAndSamplesNetwork")
@@ -288,6 +290,50 @@ wsGetGeneSetGenes <- function(ws, msg)
 
 } # wsGetGeneSetGenes
 #----------------------------------------------------------------------------------------------------
+wsGetExpressionDataSetNames <- function(ws, msg)
+{
+  datasetName <- state[["currentDatasetName"]]
+  dataset <- datasets[[datasetName]]
+
+  #payload <- getExpressionDataSetNames(dataset)
+  expressionDataSetNames <- SttrDataPackage:::getExpressionDataSetNames(dataset)
+  printf("***** expressionDataSetNames are %s ", expressionDataSetNames)
+  tbl <- manifest(datasets[[datasetName]])
+
+    # the first two columns, "variable" and "class" are not so relevant for the oncoscape display
+  #tbl <- tbl[, -c(1,2)]
+    # make some column names more friendly
+  column.titles <- colnames(tbl)
+  column.titles <- sub("entity.count", "rows", column.titles)
+  column.titles <- sub("feature.count", "cols", column.titles)
+  column.titles <- sub("entity.", "row ", column.titles)
+  column.titles <- sub("feature.", "column ", column.titles, fixed=TRUE)
+  #tbl <- tbl[paste(expressionDataSetNames, ".RData",sep=""),]
+  tbl <- tbl[expressionDataSetNames,]
+  #printf("***** after subset with expressionDataSetNames tbl becomes %s: ", tbl)
+  matrix <- as.matrix(tbl)
+  colnames(matrix) <- NULL
+  payload = list(datasetName=datasetName, colnames=column.titles, rownames=rownames(tbl), mtx=matrix)
+
+  return.msg <- list(cmd=msg$callback, status="success", callback="", payload=payload)
+
+  ws$send(toJSON(return.msg))
+
+} # wsGetExpressionDataSetNames
+#----------------------------------------------------------------------------------------------------
+wsGetExpressionDataSetExpression <- function(ws, msg)
+{
+  datasetName <- state[["currentDatasetName"]]
+  dataset <- datasets[[datasetName]]
+  expressionDataSetName <- msg$payload
+  stopifnot(expressionDataSetName %in% getExpressionDataSetNames(dataset))
+
+  payload <- getExpressionDataSetExpression(dataset, expressionDataSetName)
+  return.msg <- list(cmd=msg$callback, status="success", callback="", payload=payload)
+  ws$send(toJSON(return.msg))
+
+} # wsGetExpressionDataSetExpression
+#----------------------------------------------------------------------------------------------------
 wsGetSampleCategorizationNames <- function(ws, msg)
 {
   datasetName <- state[["currentDatasetName"]]

diff --git a/Oncoscape/R/wsOncoprint.R b/Oncoscape/R/wsOncoprint.R
@@ -2,22 +2,27 @@
 #                   -------------------       ----------------                -------------
 addRMessageHandler("oncoprint_data_selection",     "oncoprint_data_selection")            # displayOncoprint
 #----------------------------------------------------------------------------------------------------
-library(SttrDataPackage)
-library(TCGAgbm)
 oncoprint_data_selection <- function(ws, msg)
 {
     printf("=== entering oncoprint_data_selection")
 
     currentDataSetName <- state[["currentDatasetName"]]
-    ds <- state[[currentDataSetName]];
-
-
+    printf("***** currentDatasetName: %s", currentDataSetName)
+    if(!is.null(msg$payload$ds)){
+        ds <- msg$payload$ds
+    }else{
+        ds <- currentDataSetName
+    }
     printf("=== after obtaining datasets from datapackage constructor, next is processing received ws msg")
     payload_str <- msg$payload$sampleIDs
-    partial_msg <- create.oncoprint.input(payload_str, ds) 
+    #payload_mode <- msg$payload$testing
+
+    #partial_msg <- create.oncoprint.input(payload_str, ds, payload_mode) 
+    partial_msg <- create.oncoprint.input(payload_str, ds)
     return.msg <-toJSON(c(cmd=msg$callback, partial_msg))
     printf("=== before sending out result")
     ws$send(return.msg)
 
 } # data_selection
 #-------------------------------------------------------------------------------
+
diff --git a/Oncoscape/R/wsPCA.R b/Oncoscape/R/wsPCA.R
@@ -3,6 +3,7 @@
 addRMessageHandler("echo", "ws.pcaEchoHandler")
 addRMessageHandler("createPCA", "ws.createPCA")
 addRMessageHandler("calculatePCA", "ws.calculatePCA")
+addRMessageHandler("requestDataTableMeta", "ws.requestDataTableMeta")
 #----------------------------------------------------------------------------------------------------
 ws.pcaEchoHandler <- function(ws, msg)
 {
@@ -33,7 +34,7 @@ ws.createPCA <- function(ws, msg)
    eval(parse(text=cmd))
    state[["mypca"]] <- mypca
    printf("ws.createPCA just executed '%s'", cmd)
-   printf("resulting mypca object:");
+   printf("resulting mypca object:")
    print(pcaDataSummary(mypca))
 
    response <- pcaDataSummary(mypca)
@@ -71,23 +72,29 @@ ws.calculatePCA <- function(ws, msg)
    samples <- NA
    if("samples" %in% names(msg$payload))
       samples <- msg$payload$samples;
+
+   currentDataSetName <- state[["currentDatasetName"]]
+   ds <- datasets[[currentDataSetName]]
+   matrixName = msg$payload$expressionDataSet
+   cmd <- sprintf("mypca <- PCA(ds, '%s')", matrixName)
+   printf("*****cmd is: %s", cmd)
+   eval(parse(text=cmd))
+   state[["mypca"]] <- mypca
 
 
-   mypca <- state[["mypca"]]
-
    x <- calculate(mypca, genes, samples)
      # fashion a 3-column data.frame nicely suited to use with d3: gene, PC1, PC2
      # add two more scalar field: pc1.varianceAccountedFor, pc2.varianceAccounted for
 
-   mtx.loadings <- as.matrix(x$scores[, 1:2])
-   ids = x$sampleIDs;
+   mtx.scores <- as.matrix(x$scores[, 1:2])
+   ids <- x$sampleIDs
    max.value <- max(abs(c(x$scores[,1], x$scores[,2])))
    importance.PC1 = x$importance["Proportion of Variance", "PC1"]
    importance.PC2 = x$importance["Proportion of Variance", "PC2"]
 
-   payload <- list(scores=mtx.loadings, ids=ids, maxValue=max.value,
+   payload <- list(scores=mtx.scores, ids=ids, maxValue=max.value,
                    importance.PC1=importance.PC1,
-                   importance.PC2=importance.PC2)
+                   importance.PC2=importance.PC2, geneSetName=genes)
 
 
    json <- jsonlite::toJSON(list(cmd=msg$callback, callback="", status="success", payload=payload),