% NOTE -- ONLY EDIT THE .Rnw FILE!!! The .tex file is % likely to be overwritten. % %\VignetteIndexEntry{Boston Bioconductor 2011} %\VignetteDepends{made4} %\VignetteKeywords{Multivariate Analysis, Graphics, Expression Data, Microarray} %documentclass[12pt, a4paper]{article} \documentclass[12pt]{article} \usepackage{amsmath} \usepackage{times} \usepackage{hyperref} \usepackage[numbers,round]{natbib} \usepackage{Sweave} \renewcommand{\topfraction}{0.85} \renewcommand{\textfraction}{0.1} \textwidth=6.2in \textheight=8.5in %\parskip=.3cm \oddsidemargin=.1in \evensidemargin=.1in \headheight=-.3in %------------------------------------------------------------ % newcommand %------------------------------------------------------------ \newcommand{\scscst}{\scriptscriptstyle} \newcommand{\scst}{\scriptstyle} \newcommand{\Robject}[1]{{\texttt{#1}}} \newcommand{\Rfunction}[1]{{\texttt{#1}}} \newcommand{\Rclass}[1]{\textit{#1}} \newcommand{\Rpackage}[1]{\textit{#1}} \newcommand{\Rexpression}[1]{\texttt{#1}} \newcommand{\Rmethod}[1]{{\texttt{#1}}} \newcommand{\Rfunarg}[1]{{\texttt{#1}}} \begin{document} %------------------------------------------------------------ \title{Tutorial in Exploratory Data Analysis of Genomics Data} %------------------------------------------------------------ \author{Aed\'{i}n Culhane} \maketitle \tableofcontents %------------------------------------------------------------ \section{Introduction to the dataset for this tutorial} %------------------------------------------------------------ For the first part of this tutorial we will use a subset of the primate fibroblast gene expression from Karaman et al., Genome Research 2003. This study examines 3 groups, human, bonobo and gorilla expression profiles on Affymetrix HG\_U95Av2 chips \citep{Karaman2003}. This dataset contains 46 chips and is available in the Bioconducor library fibroEset (MAS5.0 data), and the web site \url{http://hacialab.usc.edu/supplement/karaman_etal_2003/index.html} (raw cel files). In this tutorial we will look at 9 chips which have been normalised using vsn. For information I have included details of how I normalised these, at the end of the tutorial. Download the normalized gene expression profiles from the web site (or Course wiki). The data are stored as a comma separated file, which is readable by MSExcel. <>== oldopt <- options(digits=3) options(width=60) on.exit( {options(oldopt)} ) @ %------------------------------------------------------------ \section{Task 1. Initial data Exploration} %------------------------------------------------------------ As we will be examining Affymetrix data, load the package \Rpackage{affy}. For exploratory analysis and ordination, we will use the package \Rpackage{made4}. <>== library(affy) library(made4) library(scatterplot3d) library(gplots) library(limma) library(annaffy) @ \Rpackage{made4} accepts gene expression data is a wide variety of input formats, including Bioconductor formats, \Rclass{AffyBatch}, \Rclass{ExpressionSet}, \Rclass{marrayRaw}, and \Robject{data.frame} or \Robject{matrix}. In this case the \Rpackage{vsn} normalised data are provided as a comma separated file. To load in R: <>= data.vsn<- read.csv("data.vsn.csv", as.is=TRUE, row.names=1) dim(data.vsn) @ The package \Rpackage{made4} contains a simple wrapper function, \Rfunction{overview} which will draw a dendrogram of hierarchical cluster analysis (1- Pearson Correlation distance metric, average linkage) of the samples \citep{Eisen1998}, a \Rfunction{boxplot} and histogram showing the distribution of the data. \begin{figure}[h!] \begin{center} <>= overview(data.vsn,labels=substring(colnames(data.vsn),1,5)) @ \caption{\label{figure 1} Overview of Fibroblast data. A) dendrogram showing results of average linkage clustering, B) boxplot and C) histrogram. The 9 Samples are labelled with their colnames (array filenames), however substring was used to reduce the length of the colnames for clarity} \end{center} \end{figure} \clearpage %------------------------------------------------------------ \section{Task 2: Interpretation - labelling with covariates} %------------------------------------------------------------ Overview shows that we have 2 major(possibly 3) groups or clusters within the data. To interpret these exploratory data clustering, sample information is required. Read a text file (tab delimited) with sample information into R. The sample annotations are in the file annt.txt, which is on the course webpage/wiki. <>== annt<-read.table("annt.txt", header=TRUE) annt[1:2,] @ \Rfunction{read.table} reads in a table as a data.frame. The column heading are: <>== colnames(annt) @ This file contains the cel filenames (Cels), shorter names for the arrays (short.names), information about the Donor (Gorilla, Bonobo, Human), Age (years), Gender (male/female), doubling time (DT) of the cell lines, and information about whether cells where established from the same cell lines (estb.same). To view the data in a column in the \Robject{data.frame}, use the \$ symbol and the column label. \Rfunction{table} can also be used to tabulate a summary of a categorical vector. <>== annt$Donor table(annt$Donor) table(annt$Gender) @ Redraw the overview plot, but add label information about the Donor. \clearpage \begin{figure}[ht!] \begin{center} <>= overview(data.vsn, label=annt$Donor, classvec=annt$Donor) @ \caption{\label{figure 2} Overview of Fibroblast data. A) dendrogram showing results of average linkage clustering, B) boxplot and C) histrogram. The 9 Samples are labelled by Donor.} \end{center} \end{figure} This is easier to interpret. It is be seen that humans are cluster distinctly from other primates. But BEFORE we go ahead and search for genes distinguishing these. CHECK the other co variants: \begin{itemize} \item Is there a confounding co-variate? \item Do the samples also group by Age, Gender, DT, estb.same ? \item What do you think of the experimental design? How could it be improved? \item Have a look at the different plots, What do you think? \end{itemize} %------------------------------------------------------------ \section{Task 3: Ordination} %------------------------------------------------------------ %------------------------------------------------------------ \subsection{Correspondence Analysis} %------------------------------------------------------------ The function \Rfunction{ord} simplifies the running of ordination methods such as principal component, correspondence or non-symmetric correspondence analysis. It provides a wrapper which can call each of these methods. To run a correspondence analysis \citep{Fellenberg2001} on this dataset. <>== data.coa <-ord(data.vsn, type="coa") @ Have a look at data.coa. The ordination results are in \$ord. The row, column coordinates are \$li and \$co respectively. The eigenvalues are in \$eig. <>== data.coa$ord data.coa$ord$co[1:2,1:3] @ In a COA analysis the total \$eig will be equivalent to the total chi-sq of the table. To get the \% of variance explained by each axis. <>= data.coa$ord$eig*100/sum(data.coa$ord$eig) @ The cumulative variance is given by <>= cumsum(data.coa$ord$eig*100/sum(data.coa$ord$eig)) @ \noindent Therefore almost 57\% of the variance is captured by the first 2 components. %------------------------------------------------------------ \subsection{Correspondence Analysis- Visualization of Results} %------------------------------------------------------------ There are many functions in \Rpackage{made4} for visualizing results from ordination analysis. The simplest way to view results from \Rfunction{ord} is to use the function \Rfunction{plot}. This will draw a plot of the eigenvalues, along with plots of the variables (genes) and a plot of the cases (microarray samples). \begin{figure}[h!] \begin{center} <>== plot(data.coa, classvec=annt$Donor) @ \caption{\label{figure 3} Correspondence analysis plot. A. plot of the eigenvalues, B. projection of microarray samples (colored by Donor) C. projection of genes (gray filled circles) and D. biplot showing both genes and samples. Samples and genes with a strong associated are projected in the same direction from the origin. The greater the distance from the origin the stronger the association} \end{center} \end{figure} \pagebreak The distinction between species is captured on the first 2 eigenvectors. Principal component 1 (horizontal) defines the human versus the other primates, and PC2 captures the difference between the Bonobo (Ppa) and the primates, human and gorilla. \clearpage A heatmap can be used to visualize the weights (or contributions) of genes or arrays to each principal component (or axis). \begin{figure}[h!] \begin{center} <>== heatplot(data.coa$ord$co, dend="none", labRow=annt$Donor) @ \caption{\label{figure 4} Heatmap of sample loadings in the new projection space. These allow easy visualization of which samples contribute to the variance on each new axes (or principal component).} \end{center} \end{figure} To plot the arrays projections from the COA. <>= plotarrays(data.coa$ord$co, classvec=annt$Donor) plotarrays(data.coa$ord$co, classvec=annt$Gender) @ The gene projections can be also visualised with \Rfunction{plotgenes}. The number of genes that are labelled at the end of the axis can be defined. The default is 10. <>= plotgenes(data.coa, n=5, col="red") @ Sometimes R may put an X in front of row names if they start with a number. Hence the names in ax1 don't agree with data. If you see this it is easy to remove the "X" in the names, \begin{verbatim} ax1<-sub("X", "", ax1) \end{verbatim} To extract a list of variables with greatest loadings or weights on an axes, (ie those at the end of an axes), use \Rfunction{topgenes}. For example, to get a list of the 5 genes at the negative and postive ends of axes 1. <>== ax1<- topgenes(data.coa, axis=1, n=5) @ To only the a list of the genes at the positive end of the first axes <>= genes.ax1<-topgenes(data.coa, end="pos", n=5) genes.ax1 @ Two lists can be compared using \Rfunction{comparelists}. \pagebreak It is useful to use boxplots to visualize the gene expression distributions of a gene in different sample groups. The distrbibutions will be plotted using the order of levels(factor). In this example the order of annt\$Donor is Ggo, Hsa, Ppa. It would be more useful to plot Hsa, Ggo and then Ppa. Therefore reorder the levels of the factor. <>== annt$Donor spec = factor(annt$Donor, levels=c("Hsa","Ggo", "Ppa")) @ \begin{figure}[h!] \begin{center} <>== par(mfrow=c(2,1)) gene.pos1<-topgenes(data.coa, end="pos", n=1) df.PosGenes<-t(data.vsn[gene.pos1,]) boxplot(df.PosGenes~spec, col=getcol(3), main=paste(gene.pos1, "has greatest loading on positive end of ax1")) gene.neg1<-topgenes(data.coa, end="neg", n=1) df.NegGenes<-t(data.vsn[gene.neg1,]) boxplot(df.NegGenes~spec, col=getcol(3), main=paste(gene.neg1,"has greatest loading on negative end of ax1")) @ \caption{\label{figure 5} Heatmap of gene expression profiles of probesets with greatest loadings on the positive and negative ends of axes 1} \end{center} \end{figure} \clearpage Make a heatmap and perform a cluster analysis of gene expression profiles of the 10 genes with highest weights (neg and pos) on axis 1. In Fig 6 we see, while the human versus non-human primates difference is captured, the difference between the non-human primates is not well defined by axis 1. \begin{figure}[h!] \begin{center} <>== gene.pos.neg<-topgenes(data.coa, end="both", n=5) heatplot(data.vsn[gene.pos.neg,], labCol=as.character(annt$Donor)) @ \caption{\label{figure 6} Heatmap of gene expression profiles of genes with greatest loadings on the negative end of axes 1} \end{center} \end{figure} There are several ways to save an active plot or a plot you have just drawn, for example look at \Rfunction{dev.copy}. MSwindows users can also use the function \Rfunction(savePlot) \begin{verbatim} savePlot("heatplot_COA") \end{verbatim} \clearpage %------------------------------------------------------------ \subsection{PCA} %------------------------------------------------------------ We have run a Correspondence Analysis, Compare these results to a PCA <>== data.pca <-ord(data.vsn, type="pca") data.pca$ord @ \begin{itemize} \item Compare the difference between the results from PCA and COA. \item How much variance is capture by each approach? \item Examine and compare plots from PCA and COA? \item In the PCA plots, do arrays segregate by Donor, Age or Gender? \end{itemize} <>== plotarrays(data.pca$ord$co, classvec=annt$Donor) plotgenes(data.pca) @ At this stage.. we need to get gene information in order to fully interpret our exploratory data analysis %------------------------------------------------------------ \section{ Task 4: Annotating the plots with gene information } %------------------------------------------------------------ By default the variables (genes) are labelled with the rownames of the matrix. Typically these are spot IDs or Affymetrix accession numbers which are not very easy to interpret. Plots can be easily re-labeled. It is often useful to labels genes with their HUGO gene symbols. We find the Bioconductor \Rpackage{annotate} and \Rpackage{annaffy} annotation packages are very useful for this. Alternatively we also use \Rpackage{biomaRt} or \Rpackage{Resourcerer} or the Stanford Source database. For this practical we will use \Rpackage{annaffy}, to get the Gene Symbol for all genes. We can then used these in plots <>== library(annaffy) @ To get a list of the Unigene, LocusLink or descriptors for these genes, we can use the following. Remember help on annaffy can always be assessed by using ? and the command name or opening help in a web browser by typing \Rfunction{help.start()}. <>== affy.id <- rownames(data.vsn) aafUniGene(affy.id[1:10], "hgu95av2.db") aafLocusLink(affy.id[1:10], "hgu95av2.db") aafDescription(affy.id[1:10], "hgu95av2.db") @ These commands return a \Rclass{list}, but to make these into a character vector use the function \Rfunction{getText} <>== getText(aafLocusLink(affy.id[1:10], "hgu95av2.db")) @ Get the list of all offical (HUGO) gene symbols and re-plot the COA results. \begin{figure}[htbp] \begin{center} <>== affy.id <-rownames(data.vsn) affy.symbols<-aafSymbol(affy.id, "hgu95av2.db") affy.symbols <-getText(affy.symbols) plotgenes(data.coa, genelabels= affy.symbols, col="red", n=10) @ \caption{\label{figure 7} Projection of genes (filled circles) in Correspondence analysis. The genes at the ends of each of the axes are labelled with HUGO gene symbols. } \end{center} \end{figure} \clearpage \begin{itemize} \item Get the gene symbols for the \Rfunction{topgenes} from the first axes which were highly expressed in human but not the other primates. \item Which genes are highly expressed in each of the other primates. \item Are any of these genes also expressed in males or females (gender)? \item Redraw the heatmap but add gene symbols. \end{itemize} <>= topgenes(data.coa, labels=affy.symbols, end="neg", n=5) @ \noindent To obtain a browsable html table of gene annotation: <>== anncols<-aaf.handler() anncols anntable <- aafTableAnn(ax1, "hgu95av2.db", anncols) saveHTML(anntable, "example1.html", title = "Example") @ Have a look at copy of this output. \begin{itemize} \item Which genes are part of the apoptosis pathway? \item How many genes are found on Chromosome 2? \item How many publications are there in PubMed on IGFBP2? \end{itemize} \newpage %------------------------------------------------------------ \section{ Advanced Tasks } %------------------------------------------------------------ If you have time, there are extra tasks. The code may also be useful to you in your own data analysis. %------------------------------------------------------------ \subsection{ Advanced Task 1: 3D Plots } %------------------------------------------------------------ To visualise the arrays (or genes) in 3D either use \Rfunction{do3d} or \Rfunction{html3d}. \Rfunction{do3d} is a wrapper for \Rfunction{scatterplot3d}, but is modified so that groups can be coloured. \Rfunction{html3d} produces a "pdb" output which can be visualised using rasmol or chime. Rasmol provides a free and very useful interface for colour, rotating, zooming 3D graphs. <>= do3d(data.coa$ord$co, classvec=annt$Donor, cex.symbols=3) rotate3d(data.coa$ord$co, classvec=annt$Donor) html3D(data.coa$ord$co, classvec=annt$Donor, writehtml=TRUE) @ html3D produces a plot which can be rotated using chime or jmol. For an example see the course website. %------------------------------------------------------------ \subsection{ Advanced Task 2: Use limma to select genes and examine these using ordination and clustering } %------------------------------------------------------------ Several feature selection methods are available. We provide a empirical comparision of these in our paper \citep{Jeffery2006}, in which we recommend \Rpackage{limma} or Rank Products (available in the package \Rpackage{RankProd} for feature selection. To perform a feature selection using limma, first generate a class vector, with 2 classes (eg human v other primate). <>= modelDonor<- model.matrix(~Donor,annt) lm.out<-lmFit(data.vsn, modelDonor) lm.out<-eBayes(lm.out) geneHsa<-topTable(lm.out, coef=2) geneHsa genePpa<-topTable(lm.out, coef=3) genePpa comparelists(geneHsa[,1], genePpa[,1]) heatplot(data.vsn[geneHsa[,1],], classvec=annt$Donor, labCol=annt$Donor) @ \clearpage %----------------------------- \subsection{Advanced Task 3: Comparing datasets (meta-analysis) using Coinertia Analysis} %----------------------------- Coinertia analysis has been applied to the cross-platform comparison (meta-analysis) of microarray gene expression datasets \citep{Culhane2003}. CIA is a multivariate method that identifies trends or co-relationships in multiple datasets which contain the same samples. That is either the rows or the columns of a matrix must be "matchable". CIA can be applied to datasets where the number of variables (genes) far exceeds the number of samples (arrays) such is the case with microarray analyses. \Rfunction{cia} calls \Rfunction{coinertia} in the R package \Rpackage{ade4}. Lets examine two gene expression datasets of the same 60 cell lines. The NCI60 cells lines are a set of 60 cell lines with different tumour phenotypes (eg Breast, Colon, Leukemia, Prostate, CNS, lung cancer, ovarian, renal cancer etc). The gene expression of these cell lines have been examined by a number of groups \citep{Ross2000},\citep{Staunton2001}. The same 60 cell lines were analysed by different labs on differnt microarray platforms. We will compare one from Affymetrix (Staunton et al., 2001) and one that was obtained using Stanford spotted cDNA arrays (Ross et al., 2000) using \Rfunction{cia}. These 2 datasets were analyzed using \Rfunction{cia} by Culhane et al., 2003 \citep{Culhane2003}. These 2 datasets are available in the \Rpackage{made4} data package \Robject{NCI60}. The Ross dataset contains 1375 genes, and the affy dataset contains 1517. There is little overlap betwen the genes represented on these platforms. CIA allows visualisation of genes with similar expression patterns across platforms. <>= data(NCI60) summary(NCI60) names(NCI60) NCI60$classes[1:3,] table(NCI60$classes[,2]) coin <- cia(NCI60$Ross, NCI60$Affy) names(coin) coin$coinertia @ The RV coefficient \$RV which is \Sexpr{signif(coin$coinertia$RV,3)} in this instance, is a measure of "global" similarity between the datasets. The closer to 1, in the scale 0-1 the greater the correlation between the two datasets. <>== coin$coinertia$RV @ To visually examine the cell lines that have similar or different gene expression profiles in these datasets, use \Rfunction{plotarrays}. \begin{figure}[h!] \begin{center} <>== plotarrays(coin, classvec=NCI60$classes[,2], lab="", cpoint=3) @ \caption{\label{figure 10} Coinertia analysis of NCI 60 cell line Spotted and Affymetrix gene expression dataset. Each cell lines is colored by its phenotype (eg colon are green,breast are red, melanoma are pink etc). For each of the 60 cell lines, there are two coordinates (\$coinertia\$mX and \$coinertia\$mY). On the plot, these are visually shown as a closed circle and an arrow. These are joined by a line. If the profiles are similar they will be projected close together in the new space (ie joined by a short line). For more information see Culhane et al., BMC bioinformatics 2003.} \end{center} \end{figure} \clearpage If \Rfunction{plot} is used, the above plot together with the plots of the gene projections from each dataset can be visualized. \begin{figure}[h!] \begin{center} <>== plot(coin, classvec=NCI60$classes[,2]) @ \caption{\label{figure 11} Coinertia analysis of NCI 60 cell line Spotted and Affymetrix gene expression dataset. A) shows a plot of the 60 microarray samples projected onto the one space. The 60 circles represent dataset 1 (Ross) and the 60 arrows represent dataset 2 (affy). Each circle and arrow are joined by a line, the length of which is proportional to the divergence between that samples in the two datasets. The samples are coloured by cell type. B)The gene projections from datasets 1 (Ross), C) the gene projections from dataset 2 (Affy). Genes and samples projected in the same direction from the origin show genes that are expressed in those samples. } \end{center} \end{figure} \clearpage Coinertia analysis be applied to other types of data including the integration of gene expression and transcription factor binding site data \citep{Jeffery2006b} or to the analysis of gene and protein expression data \citep{Fagan2007}. %------------------------------------------------------------ \section{vsn normalization of data.. for information only} %------------------------------------------------------------ For information only, please don't repeat in this today. \Rpackage{vsn} was used to normalize the Affymetrix data. To produce the normalized data, the cel files were downloaded, and then in R, use File -> Change directory (or \Rfunction{setwd} to select the directory containing the cel files. Then \begin{verbatim} getwd() dir() library(affy) library(vsn) cels <- list.celfiles() data <- ReadAffy(filenames= cels) normalize.AffyBatch.methods <- c(normalize.AffyBatch.methods, "vsn") data.vsn <- es1 = expresso(data, bg.correct = FALSE, normalize.method = "vsn", pmcorrect.method = "pmonly", summary.method = "medianpolish") exprs2excel(data.vsn, file="data.vsn.csv") \end{verbatim} A tab delimited text file could also be saved using \begin{verbatim} write.exprs(data.vsn, file="data.rma.txt") \end{verbatim} %------------------------------------------------------------ \section{Further help} %------------------------------------------------------------ More information about \Rpackage{made4} is available at \url{http://www.bioconductor.org}. Extensive tutorials, examples and documentation on multivariate statistical methods are available from the ade4 website \url{http://pbil.univ-lyon1.fr/ADE-4} and \Rpackage{ade4} user support is available through the ADE4 mailing list \citep{Thioulouse1997}. The \Rpackage{ade4} homepage is \url{http://pbil.univ-lyon1.fr/ADE-4}. This tutorial assumes a basic knowledge of R, the Emmanuel Paradis's \textbf{R for Beginners} is a good guide to those unfamiliar with R and is available at \url{http://cran.r-project.org/doc/contrib/Paradis-rdebuts_en.pdf}. For more examplez and information on \Rpackage{made4}, please see: Culhane AC, Thioulouse J (2006) A multivariate approach to integrating datasets using made4 and ade4. \textbf{R News: Special Issue on Bioconductor} \textit{ Dec 2006} \url{http://cran.r-project.org/doc/Rnews/Rnews_2006-5.pdf} Culhane AC, Thioulouse J, Perriere G, Higgins DG.(2005) MADE4: an R package for multivariate analysis of gene expression data. \textit{Bioinformatics} \textbf{21(11):}2789-90. \newpage Information about this session: <>== sessionInfo() @ \bibliographystyle{plainnat} \begin{thebibliography}{13} \bibitem{Karaman2003} Karaman MW, Houck ML, Chemnick LG, Nagpal S, Chawannakul D, Sudano D, Pike BL, Ho VV, Ryder OA, Hacia JG \newblock Comparative analysis of gene-expression patterns in human and African great ape cultured fibroblasts. \newblock \textit{Genome Res.} \textbf{13(7)}:1619-30.2003. \bibitem{Eisen1998} Eisen, M.B., Spellman, P.T., Brown, P.O., and Botstein, D. \newblock Cluster analysis and display of genome-wide expression patSwterns. \newblock \textit{Proc Natl Acad Sci U S A} \textbf{95}: 14863-14868. 1998. \bibitem{Fellenberg2001} Fellenberg, K., Hauser, N.C., Brors, B., Neutzner, A., Hoheisel, J.D., and Vingron, M. \newblock Correspondence analysis applied to microarray data. \newblock \textit{Proc Natl Acad Sci U S A} \textbf{98}: 10781-10786. 2001. \bibitem{Jeffery2006} Jeffery IB, Higgins DG, Culhane AC. Comparison and evaluation of microarray feature selection methods. \newblock \textit{BMC Bioinformatics} \textbf{7}:359. 2006. \bibitem{Tusher2001} Tusher VG, Tibshirani R, Chu G. \newblock Significance analysis of microarrays applied to the ionizing radiation response. \newblock \textit{Proc Natl Acad Sci U S A}.\textbf{98(9)}:5116-21. 2001. \bibitem{Thioulouse1997} Thioulouse,J., Chessel,D., Dol\'edec,S., and Olivier,J.M \newblock ADE-4: a multivariate analysis and graphical display software. \newblock \textit{Stat. Comput.}, \textbf{7}, 75-83. 1997. \bibitem{Culhane2002} Culhane, A.C., Perriere, G., Considine, E.C., Cotter, T.G., and Higgins, D.G. \newblock Between-group analysis of microarray data. \newblock \textit{Bioinformatics} \textbf{18}: 1600-1608. 2002. \bibitem{Khan2001} Khan,J., Wei,J.S., Ringner,M., Saal,L.H., Ladanyi,M., Westermann,F., Berthold,F., Schwab,M., Antonescu,C.R., Peterson,C. et al. \newblock Classification and diagnostic prediction of cancers using gene expression profiling and artificial neural networks. \newblock \textit{Nat. Med.} \textbf{7}:673-679. \bibitem{Culhane2003} Culhane AC, Perriere G, Higgins DG. \newblock Cross platform comparison and visualisation of gene expression data using co-inertia analysis. \newblock \textit{BMC Bioinformatics}.\textbf{4}:59. 2003. \bibitem{Ross2000} Ross DT, Scherf U, Eisen MB, Perou CM, Rees C, Spellman P, Iyer V, Jeffrey SS, Van de Rijn M, Waltham M, Pergamenschikov A, Lee JC, Lashkari D, Shalon D, Myers TG, Weinstein JN, Botstein D, Brown PO. \newblock Systematic variation in gene expression patterns in human cancer cell lines. \newblock \textit{Nat Genet} \textbf{24}:227-235 2000, \bibitem{Staunton2001} Staunton JE, Slonim DK, Coller HA, Tamayo P, Angelo MJ, Park J, Scherf U, Lee JK, Reinhold WO, Weinstein JN, Mesirov JP, Lander ES, Golub TR: \newblock Chemosensitivity prediction by transcriptional profiling. \newblock \textit{Proc Natl Acad Sci U S A} \textbf{98}:10787-10792. 2001 \bibitem{Jeffery2006b} Jeffery IB,Madden SF,McGettigan PA,Perriere G, Culhane AC, Higgins DG \newblock Integrating transcription factor binding site information with gene expression datasets. \newblock \textit{Bioinformatics} \textbf{23(3)}:298-305.2006. \bibitem{Fagan2007} Fagan A, Culhane AC, Higgins DG. \newblock A multivariate analysis approach to the integration of proteomic and gene expression data. \newblock \textit{Proteomics} Jun 5 2007. \end{thebibliography} \end{document}