library(GeneGroupAnalysisBetaCS)
library(breastCancerVDX)
library(hgu133a.db)


data(vdx,package="breastCancerVDX")
#--- Normalized expression data set
minn.data.expr=exprs(vdx)
pData(vdx)$er[1:4,]
minn.data.expr[1:4,]

dim(minn.data.expr)

#---- Check that the columns of the data is in the same order of the rows of the phenotype information
colnames(minn.data.expr)
all(colnames(minn.data.expr)==rownames(pData(vdx)))

#---- Ids of patients with ER status
er.pos=which(pData(vdx)$er==1)
er.neg=which(pData(vdx)$er==0)

#----- Data bases from MSidDB at the Broad Institute
#genes.db.gmt=readLines("c2.cp.reactome.v3.0.symbols.gmt")
#genes.db.gmt=readLines("c5.bp.v3.0.symbols.gmt")    
#genes.db.gmt=readLines("c5.mf.v3.0.symbols.gmt")
genes.db.gmt=readLines("c5.cc.v3.0.symbols.gmt")
genes.db.gmt[3]

genes.db.lista=lapply(genes.db.gmt,function(x){
	                 unlist(strsplit(x, "\t"))
                      })
genes.db.lista[1]


#--------- code
affy.ids.data=rownames(minn.data.expr)   #------ Affy ids of the data set
zz=as.list(hgu133aSYMBOL)                #------ Mapping between Gene Symbols and Affy identifiers        
gene.ids.data=unlist(zz[affy.ids.data])  #------ Gene symbols associated to the affy ids of the data set (same order)
gene.uncs.data=unique(gene.ids.data)     #------ Given repetitiosn, unique Gene Symbols
out=which(is.na(gene.uncs.data)==TRUE) #----- Some Affy identifiers do not map to Gene Symbols (NA), need to take them out
gene.uncs.data=gene.uncs.data[-out]       #------ Given repetitiosn, unique Gene Symbols


#--------- Obtaining the affy ids that map to the same Gene Symbol with max variability
gene.ids.uncs.data=GeneMaxVarFun(gene.uncs.data,gene.ids.data,minn.data.expr,er.pos,er.neg)

#---------- Convert the gene groups to row ids groups
gene.ids.grups=DBList2IDGrps(genes.db.lista,gene.uncs.data,gene.ids.uncs.data)

#----------- Groups and sizes diseired
wrkng.info=SizeDBGrps(gene.ids.grups,11)

names.db=lapply(genes.db.lista,function(x){
     	    res=x[[1]][1]
           })
names.db=unlist(names.db)


mcmc.data=MCMCData.cs(wrkng.info$groups.selected,gene.ids.grups,names.db,minn.data.expr,er.pos,er.neg)    
noRow=dim(mcmc.data$y.mu.a)[1]
noColA=dim(mcmc.data$y.mu.a)[2]
noColB=dim(mcmc.data$y.mu.b)[2]
numIt=50000
GrpSzs=wrkng.info$groups.size
YMuA=mcmc.data$y.mu.a
YMuB=mcmc.data$y.mu.b

parametroA=rnorm(length(wrkng.info$groups.size),0,1)
parametroB=rnorm(length(wrkng.info$groups.size),0,1)
parametroS=0.1
	
forma=3
scala=0.4      #0.4 CC      #0.4 MF              #0.4 BP     # 0.3  Reactome
ro=0.1
mm=0.55        # 0.55 CC    #0.55 MF            #0.65 BP       # 0.55 Reactome
aa.pi=10
Grps.apriori.diff.exp=floor(length(mcmc.data$proc.GO)*0.05)
	
often=100
burn.in=7000
cut.off=0.9
	
resultads=matrix(0,noRow,1)
aaa=proc.time()
cosa=GeneGroups.CS(noRow,noColA,noColB,numIt,GrpSzs,YMuA,YMuB,parametroA,parametroB,parametroS,forma,scala,ro,mm,aa.pi,Grps.apriori.diff.exp,often,burn.in,cut.off,resultads)
aaa=proc.time()-aaa
print(aaa)    
    
selec=which(cosa>cut.off)
mcmc.data$proc.GO[selec]

plot(1:length(cosa),cosa,type="h") 
abline(h=cut.off,col="red")


vignette("GeneGroupAnalysisBetaCS")


library(hgu133a.db)
library(annotate)

library("GO.db")
library(MASS)
library(genefilter)
library(annotate)
library(GO.db)
library(limma)