Skip to content

Commit

Permalink
Adding Anthony's updates from PR 4 - revert was messy
Browse files Browse the repository at this point in the history
  • Loading branch information
bahill committed Feb 8, 2021
1 parent a457b46 commit bfd11c5
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 31 deletions.
Binary file modified GEOImporter.doc
Binary file not shown.
67 changes: 40 additions & 27 deletions manifest
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#Tue Nov 24 19:37:11 UTC 2020
author=Joshua Gould, Broad Institute
commandLine=<Rscript> <libdir>main.R <libdir> -d<data.column.name> -o<output.filename> -a<GEO.accession> -f<GEO.SOFT.file> -1<ftp.proxy.server> -2<ftp.proxy.username> -3<ftp.proxy.password> -4<http.proxy.server> -5<http.proxy.username> -6<http.proxy.password>
commandLine=<Rscript> <libdir>main.R <libdir> -n<Drop.NA.Rows> -d<data.column.name> -o<output.filename> -a<GEO.accession> -f<GEO.SOFT.file> -1<ftp.proxy.server> -2<ftp.proxy.username> -3<ftp.proxy.password> -4<http.proxy.server> -5<http.proxy.username> -6<http.proxy.password>
cpuType=any
description=Imports data from the Gene Expression Omnibus (GEO)
fileFormat=gct
Expand All @@ -27,62 +27,75 @@ p2_prefix_when_specified=
p2_TYPE=FILE
p2_type=java.io.File
p2_value=
p3_default_value=VALUE
p3_description=The name of the column that contains the expression values
p3_name=data.column.name
p3_optional=on
p3_MODE=
p3_TYPE=TEXT
p3_default_value=FALSE
p3_description=Omit rows containing all "NA" values from GSExxx data series.
p3_fileFormat=
p3_flag=-n
p3_name=Drop.NA.Rows
p3_numValues=1..1
p3_optional=
p3_prefix=
p3_prefix_when_specified=
p3_type=java.lang.String
p3_value=
p4_default_value=<GEO.accession><GEO.SOFT.file_basename>
p4_description=The name of the output file
p4_name=output.filename
p4_optional=
p3_value=FALSE\=false;TRUE\=true
p4_default_value=VALUE
p4_description=The name of the column that contains the expression values
p4_name=data.column.name
p4_optional=on
p4_prefix_when_specified=
p4_type=java.lang.String
p4_value=
p5_default_value=
p5_description=FTP Proxy server
p5_name=ftp.proxy.server
p5_optional=on
p5_default_value=<GEO.accession><GEO.SOFT.file_basename>
p5_description=The name of the output file
p5_name=output.filename
p5_optional=
p5_prefix_when_specified=
p5_type=java.lang.String
p5_value=
p6_default_value=
p6_description=FTP Proxy username
p6_name=ftp.proxy.username
p6_description=FTP Proxy server
p6_name=ftp.proxy.server
p6_optional=on
p6_prefix_when_specified=
p6_type=java.lang.String
p6_value=
p7_default_value=
p7_description=FTP Proxy password
p7_name=ftp.proxy.password
p7_description=FTP Proxy username
p7_name=ftp.proxy.username
p7_optional=on
p7_prefix_when_specified=
p7_type=PASSWORD
p7_type=java.lang.String
p7_value=
p8_default_value=
p8_description=HTTP Proxy server
p8_name=http.proxy.server
p8_description=FTP Proxy password
p8_name=ftp.proxy.password
p8_optional=on
p8_prefix_when_specified=
p8_type=java.lang.String
p8_type=PASSWORD
p8_value=
p9_default_value=
p9_description=HTTP Proxy username
p9_name=http.proxy.username
p9_description=HTTP Proxy server
p9_name=http.proxy.server
p9_optional=on
p9_prefix_when_specified=
p9_type=java.lang.String
p9_value=
p10_default_value=
p10_description=HTTP Proxy password
p10_name=http.proxy.password
p10_description=HTTP Proxy username
p10_name=http.proxy.username
p10_optional=on
p10_prefix_when_specified=
p10_type=PASSWORD
p10_type=java.lang.String
p10_value=
p11_default_value=
p11_description=HTTP Proxy password
p11_name=http.proxy.password
p11_optional=on
p11_prefix_when_specified=
p11_type=PASSWORD
p11_value=

privacy=public
publicationDate=11/01/2007 20\:31
Expand Down
18 changes: 14 additions & 4 deletions src/main.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ message <- function (..., domain = NULL, appendLF = TRUE) {

# GSExxx, Series
GseToGct <- function(gse=NULL, data.column.name='VALUE',
gct.output.filename=NULL) {
gct.output.filename=NULL, omit.na=FALSE) {
t <- Table(GPLList(gse)[[1]])
ids <- t$ID # ids in GPL

Expand All @@ -24,7 +24,14 @@ GseToGct <- function(gse=NULL, data.column.name='VALUE',
mymatch <- match(ids, tab$ID_REF)
return(tab[,data.column.name][mymatch])
}))


if(omit.na==TRUE) {
filter <- which(rowSums(is.na(data.matrix)) != ncol(data.matrix))
data.matrix <- data.matrix[filter,]
desc <- desc[filter]
ids<- ids[filter]
}

row.names(data.matrix) <- ids
gct <- list(data=data.matrix, row.descriptions=desc)
write.gct(gct, gct.output.filename)
Expand All @@ -33,7 +40,8 @@ GseToGct <- function(gse=NULL, data.column.name='VALUE',


# GDSxxx, e.g. GDS1, GDS2577
GdsToGct <- function(gds=NULL, gct.output.filename) {
GdsToGct <- function(gds=NULL, gct.output.filename,
omit.na=FALSE) {
eset <- GDS2eSet(gds, do.log2 = FALSE)
f <- eset@featureData
annotations <- row.names(varMetadata(f))
Expand Down Expand Up @@ -111,6 +119,8 @@ run <- function(libdir, args) {
geo.id <- value
} else if(flag=='-f') {
filename <- value
} else if(flag=='-n') {
omit.na <- as.logical(value)
} else if(flag=='-d') {
data.column.name <- value
} else if(flag=='-o') {
Expand Down Expand Up @@ -170,7 +180,7 @@ run <- function(libdir, args) {
if (class(geo.query) == "GSE") {
info("converting GSE...")
GseToGct(gse=geo.query, data.column.name=data.column.name,
gct.output.filename=gct.output.filename)
gct.output.filename=gct.output.filename, omit.na=omit.na)
} else if (class(geo.query) == "GDS") {
info("converting GDS...")
GdsToGct(gds=geo.query, gct.output.filename=gct.output.filename)
Expand Down

0 comments on commit bfd11c5

Please sign in to comment.