library(GeneGroupAnalysisBetaCS) library(breastCancerVDX) library(hgu133a.db) data(vdx,package="breastCancerVDX") #--- Normalized expression data set minn.data.expr=exprs(vdx) pData(vdx)$er[1:4,] minn.data.expr[1:4,] dim(minn.data.expr) #---- Check that the columns of the data is in the same order of the rows of the phenotype information colnames(minn.data.expr) all(colnames(minn.data.expr)==rownames(pData(vdx))) #---- Ids of patients with ER status er.pos=which(pData(vdx)$er==1) er.neg=which(pData(vdx)$er==0) #----- Data bases from MSidDB at the Broad Institute #genes.db.gmt=readLines("c2.cp.reactome.v3.0.symbols.gmt") #genes.db.gmt=readLines("c5.bp.v3.0.symbols.gmt") #genes.db.gmt=readLines("c5.mf.v3.0.symbols.gmt") genes.db.gmt=readLines("c5.cc.v3.0.symbols.gmt") genes.db.gmt[3] genes.db.lista=lapply(genes.db.gmt,function(x){ unlist(strsplit(x, "\t")) }) genes.db.lista[1] #--------- code affy.ids.data=rownames(minn.data.expr) #------ Affy ids of the data set zz=as.list(hgu133aSYMBOL) #------ Mapping between Gene Symbols and Affy identifiers gene.ids.data=unlist(zz[affy.ids.data]) #------ Gene symbols associated to the affy ids of the data set (same order) gene.uncs.data=unique(gene.ids.data) #------ Given repetitiosn, unique Gene Symbols out=which(is.na(gene.uncs.data)==TRUE) #----- Some Affy identifiers do not map to Gene Symbols (NA), need to take them out gene.uncs.data=gene.uncs.data[-out] #------ Given repetitiosn, unique Gene Symbols #--------- Obtaining the affy ids that map to the same Gene Symbol with max variability gene.ids.uncs.data=GeneMaxVarFun(gene.uncs.data,gene.ids.data,minn.data.expr,er.pos,er.neg) #---------- Convert the gene groups to row ids groups gene.ids.grups=DBList2IDGrps(genes.db.lista,gene.uncs.data,gene.ids.uncs.data) #----------- Groups and sizes diseired wrkng.info=SizeDBGrps(gene.ids.grups,11) names.db=lapply(genes.db.lista,function(x){ res=x[[1]][1] }) names.db=unlist(names.db) mcmc.data=MCMCData.cs(wrkng.info$groups.selected,gene.ids.grups,names.db,minn.data.expr,er.pos,er.neg) noRow=dim(mcmc.data$y.mu.a)[1] noColA=dim(mcmc.data$y.mu.a)[2] noColB=dim(mcmc.data$y.mu.b)[2] numIt=50000 GrpSzs=wrkng.info$groups.size YMuA=mcmc.data$y.mu.a YMuB=mcmc.data$y.mu.b parametroA=rnorm(length(wrkng.info$groups.size),0,1) parametroB=rnorm(length(wrkng.info$groups.size),0,1) parametroS=0.1 forma=3 scala=0.4 #0.4 CC #0.4 MF #0.4 BP # 0.3 Reactome ro=0.1 mm=0.55 # 0.55 CC #0.55 MF #0.65 BP # 0.55 Reactome aa.pi=10 Grps.apriori.diff.exp=floor(length(mcmc.data$proc.GO)*0.05) often=100 burn.in=7000 cut.off=0.9 resultads=matrix(0,noRow,1) aaa=proc.time() cosa=GeneGroups.CS(noRow,noColA,noColB,numIt,GrpSzs,YMuA,YMuB,parametroA,parametroB,parametroS,forma,scala,ro,mm,aa.pi,Grps.apriori.diff.exp,often,burn.in,cut.off,resultads) aaa=proc.time()-aaa print(aaa) selec=which(cosa>cut.off) mcmc.data$proc.GO[selec] plot(1:length(cosa),cosa,type="h") abline(h=cut.off,col="red") vignette("GeneGroupAnalysisBetaCS") library(hgu133a.db) library(annotate) library("GO.db") library(MASS) library(genefilter) library(annotate) library(GO.db) library(limma)