################################################################################ ## Project PuMaQC - pipeline ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## This is the main controlling script ## ## (c)GNU GPL P.V.Nazarov, J.P.Corte-Real ################################################################################ #ini.file = "f:/Data/PuMaQC/testStudy1/project.ini" #src.path = "n:/Develop/R/PipeLines/PuMaQC.b" PuMaQC = function(ini.file="",src.path = "http://sablab.net/PuMaQC") { source(file.path(src.path,"PuMaQC.Search.r")) source(file.path(src.path,"PuMaQC.Download.r")) source(file.path(src.path,"PuMaQC.Preprocessing.r")) source(file.path(src.path,"PuMaQC.QC.r")) source("http://sablab.net/scripts/parseINI.r") st.title = c("\n ##################################################\n", "## P u M a Q C ##\n", "##-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=##\n", "## PuMAQC - Public Microarray Quality Control ##\n", "## tool provides a simple pipeline for querying ##\n", "## and downloading Affymetrix data from GEO, ##\n", "## followed by preprocessing, quality control ##\n", "## and report generation. ##\n", "##-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=##\n", "## (c)GNU GPL P.V.Nazarov, J.P.Corte-Real, 2011 ##\n", "##################################################\n") cat(st.title) ## Check whether INI file exists and validate its structure while (T){ cat("\n\n") cat("Your initialization file is:\n>> ") cat(ini.file) cat("\nPress ENTER if it is correct or type new file name (ESC/Ctrl-C for exit):\n") key = scan(n=1,what="") key = ifelse(length(key)==0,ini.file,key) cat("\nTrying to open ",key," ... ") if (file.exists(key)){ cat("FOUND.\n") ini.file=key } else { cat("NOT FOUND! Redefine file location, please.\n") next } ## Validate INI file Info = parseINI(ini.file,correct.bslash=T) fields.need = c("Project$Title", "Search$GEOmetadb","Search$Platform","Search$Organism", "Search$Results","Import$DownloadTo","Import$CDF","Import$APT") fields.ini = character(0) for (i in 1:length(Info)) fields.ini=c(fields.ini,paste(names(Info)[i],names(Info[[i]]),sep="$")) if (sum(!(fields.need %in% fields.ini))>0) { cat("Some fields are missed in INI file:", fields.need[!(fields.need %in% fields.ini)],"\n") next } cat("INI file structure seems to be correct.\n") ## ToDo! Check APT tools ## ToDo! Check paths in INI - no spaces ## ToDo! Check that at least one include key is given break } # INI file check ## set default folder to the same folder as INI, if we are not there already if (length(grep("/",ini.file))>0){ setwd(sub("/[^/]+$","",ini.file)) }else{ ini.file = file.path(getwd(),ini.file) } ################################################################################ ## Libraries: check installed libraries and install what is necessary ################################################################################ selectOption = function (txt="Enter:",opt=c(letters,0:9)){ ans="" repeat{ cat("\n") cat(txt) ans = scan(n=1,what="") if (length(ans)==0)ans="" ans=casefold(ans) if (ans %in% opt) break cat("...not recognized. Please repeat\n") } return(ans) } installed = as.data.frame(installed.packages(), stringsAsFactors=F)$Package required = c("GEOquery","GEOmetadb","arrayQualityMetrics","R.utils") idx.to.install = which(!(required %in% installed)) if (length(idx.to.install)>0){ cat("The following libraries are not installed:",paste(required[idx.to.install],collapse=", "),"\n") ans = selectOption("Would you like to do this now? (Y/n): ",c("","y","n")) if (ans=="y"|ans=="") { source("http://bioconductor.org/biocLite.R") for (i in idx.to.install) biocLite(required[i]) }else{ cat("Ok. But they may be required for different steps of analysis later\n") } } ################################################################################ ## Operation Loop ################################################################################ while(T){ status = data.frame(matrix(-1,nc=3,nr=4)) names(status)=c("part","code","text") status$part=c("Search","Download","Preprocessing","QC") Meta=data.frame(matrix(nc=2,nr=2)) if (file.exists(Info$Search$Results)) { status$code[1] = 1 Meta=read.table(Info$Search$Results,header=T,sep="\t",comment.char="",quote="\"",as.is=T) }else status$code[1] = 0 if (length(grep("PuMaQC",names(Meta)))>0) status$code[2] = 1 if (file.exists(Info$Import$Results)) status$code[3] = 1; if (file.exists(Info$QC$Results)) status$code[4] = 1; for ( i in which(status$code== -1)){ if (status$code[i-1]==1) status$code[i]=0 } status$text = c("not allowed","not done","done")[status$code+2] while(T){ cat("\n\n") cat("========================================================\n") cat("PuMaQC is ready to work. Select one of the options:\n") cat(" 1. GEO Search (",status$text[1],")\n") cat(" 2. Data Download and Unpack (",status$text[2],")\n") cat(" 3. Data Preprocessing (",status$text[3],")\n") cat(" 4. Quality Control (",status$text[4],")\n") cat(" 5. Run complete pipeline (1-4)\n") cat(" 0. Exit (or ESC / Ctrl-C)") key = as.integer(selectOption("Choose action: ",0:5)) if (key==0) break if (key==5) break if (status$code[key]>=0) break cat("this action cannot be performed yet.\n") } if (key==0) break ##================= ## SEARCH if (key==1 | key==5) { cat("\n") cat("######################################################\n") cat("## PuMaQC: SEARCH ##\n") cat("######################################################\n") # load libraties library(GEOquery) library(GEOmetadb) setwd(sub("/[^/]+$","",ini.file)) Info = parseINI(ini.file, correct.bslash=T) # download GEOmetadb.sqlite file if it does not exist if (is.null(Info$Search$GEOmetadb)) Info$GEOmetadb="" if (file.exists(Info$Search$GEOmetadb)){ if (file.info(Info$Search$GEOmetadb)$size<2^30){ cat("\nIncomplete GEOmetadb.sqlite file is detected. Removing...\n\n") file.remove(Info$Search$GEOmetadb) } } if (!file.exists(Info$Search$GEOmetadb)){ try(getSQLiteFile(destdir = sub("/[^/]+$","",Info$Search$GEOmetadb), destfile = paste(sub("^.+/","",Info$Search$GEOmetadb),".gz",sep=""))) ## manual download # download.file("http://gbnci.abcc.ncifcrf.gov/geo/GEOmetadb.sqlite.gz",method="internal",destfile = "GEOmetadb.sqlite.gz",mode="wb") # R.utils::gzip("GEOmetadb.sqlite.gz", destname="GEOmetadb.sqlite", overwrite=F) } flush.console() try(dev.off(), silent = TRUE) try(PuMaQC.Search(ini.file,src.path)) try(dev.off(), silent = TRUE) warnings() cat("\nFinished.\nSee PuMaQC-1_Search.pdf for the report\n") cat("See and modify ",Info$Search$Results," file for manual data exclusion.\n") } ## IMPORT if (key==2 | key==5) { cat("\n") cat("######################################################\n") cat("## PuMaQC: Download ##\n") cat("######################################################\n") flush.console() try(dev.off(), silent = TRUE) try(PuMaQC.Download(ini.file,src.path)) try(dev.off(), silent = TRUE) warnings() cat("\nFinished.\n") } ## PREPROCESSING if (key==3 | key==5) { cat("\n") cat("######################################################\n") cat("## PuMaQC: PREPROCESSING ##\n") cat("######################################################\n") flush.console() try(dev.off(), silent = TRUE) try(PuMaQC.Preprocessing(ini.file,src.path)) try(dev.off(), silent = TRUE) warnings() cat("\nFinished.\n") } ## QC if (key==4 | key==5) { cat("\n") cat("######################################################\n") cat("## PuMaQC: QC ##\n") cat("######################################################\n") flush.console() try(dev.off(), silent = TRUE) try(PuMaQC.QC(ini.file,src.path)) try(dev.off(), silent = TRUE) warnings() cat("\nFinished.\n") } } # operating loop }