########################################################### # Supplementary Online Material for "Graphical procedures # for evaluating overall and subject-specific incremental # values from new predictors with censored event time data." # by Hajime Uno, Tianxi Cai, Lu Tian and LJ Wei # # cf. http://bcb.dfci.harvard.edu/~huno/R_code.html ########################################################### #-- load the libraries --# library(survival); library(sm) ; library(locfit) ; library(KernSmooth) ; library(quantreg) #=========================================== shade <- function(x, yl, yu, ...) { nl <- length(x) for(i in 1:(nl-1)) { polygon(c(x[i], x[i], x[i+1], x[i+1]), c(yl[i], yu[i], yu[i+1], yl[i+1]), ...) } } #=========================================== VTM<-function(vc, dm){ matrix(vc, ncol=length(vc), nrow=dm, byrow=T) } #=========================================== Kern.FUN <- function(zz,zi,bw,kern0=mykern) ## returns an (n x nz) matrix ## { out = (VTM(zz,length(zi))- t(VTM(zi, length(zz))))/bw switch(kern0, "epan"= 0.75*(1-out^2)*(abs(out)<=1)/bw, "gauss"= dnorm(out)/bw ) } #=========================================== Ghat.FUN <- function(tt, Ti, Di,type='fl', Gi=NULL) # KM for cencoring distribution # tt ... distinct time # Ti ... survival time # Di ... statuts (1: event, 0: censor) # Gi ... Perturbation (random number with mean 1 and var 1) { tmpind <- rank(tt) if (is.null(Gi)){Gi=rep(1,length(Ti))} summary(survfit(Surv(Ti,1-Di)~1, se.fit=F, type=type, weights=Gi), sort(tt))$surv[tmpind] } #=========================================== WGT.FUN <- function(data, t0, Gi=NULL) # IPCW calculation # # data... 1st col=time, 2nd col=status # t0 .... a specific time of interest for prediction # Gi .... Perturbation (random number with mean 1 and var 1) { Ti <- data[,1]; Di <- data[,2] Wi <- rep(0,length(Ti)) Wi[Ti <= t0] <- Di[Ti<=t0]/Ghat.FUN(Ti[Ti<=t0],Ti,Di,Gi=Gi) Wi[Ti > t0] <- 1/Ghat.FUN(t0,Ti,Di, Gi=Gi) Wi } #=========================================== Est.HR.FUN <- function(data, t0, Gi=NULL) { xi = data[,1]; di <- data[,2]; zi <- data[,-(1:2),drop=F]; nn <- length(xi) if(is.null(Gi)){ betahat <- coxph(Surv(xi,di)~zi); LamCox.t <- basehaz(betahat,center=FALSE); LamCox.t0 <- approx(LamCox.t$time,LamCox.t$hazard,t0)$y }else{ betahat <- coxph(Surv(xi[Gi!=0],di[Gi!=0])~zi[Gi!=0,],weights=Gi[Gi!=0]) LamCox.t <- basehaz(betahat,center=FALSE); LamCox.t0 <- approx(LamCox.t$time,LamCox.t$hazard,t0)$y } bzi <- zi%*%betahat$coef rs <- 1-exp(-LamCox.t0*exp(bzi)) #CoxRisk.u = exp(jump.u)*phat.km$LamCox.t0; return(list("risk"=rs, "bz"=bzi, "beta"=betahat$coef, "LamCox.t0"=LamCox.t0,"LamCox.t"=LamCox.t, "ft"=betahat)) } #================================================================ # Local Linear Quantile Regression #================================================================ loc.qreg<-function(smdata, tau, bw, mykern="gauss", grid=300, x.range=NULL, x.point=NULL, Gi=NULL){ #--IN----------------# # smdata : data[x, y, weight] # tau : quantile to regress; 0.5=median # bw : bandwidth # mykern : kernel function "gauss" or "epan" # grid : # of grid # x.range : range of index to smooth over # x.point : grid : # of grid # Gi : random number for pertubation #--OUT---------------# # out : z=f_tau(z) #--------------------# n<-nrow(smdata) xi<-smdata[,1] ; yi<-smdata[,2] ; wi <-smdata[,3] if (is.null(Gi)){Gi=rep(1,n)} zz<-NULL if (!is.null(x.point)){zz<-x.point} if (is.null(x.point)){ if (!is.null(x.range)){zz <- seq(min(x.range), max(x.range), length.out=grid)} else{zz <- quantile(xi,seq(0.03,0.97,length=grid))} } nz=length(zz) ; nz myhat<-rep(0, nz) kern<-Kern.FUN(zz,xi,bw,kern0=mykern) ## returns an (n x nz) matrix ## kern[,1] for (i in 1:nz){ xi.adj=xi-zz[i] weight<-wi*kern[,i]*Gi qrfit<-rq(yi ~ xi.adj, tau=tau, method="br", weights=weight) ahat<-qrfit$coefficients myhat[i]<-ahat[1] } out<-as.matrix(cbind(zz,myhat)) return(out) } #====================================== check.fun<-function(z, tau){ out<-z out[z>0]<-out[z>0]*tau out[z<0]<-out[z<0]*(tau-1) out } #====================================== L1Err<-function(mfun, tau, data.test){ xx=data.test[,1] ; yy=data.test[,2] ; ww=data.test[,3] approx.qft<-approxfun(unique(mfun)) yy.hat<-approx.qft(xx) di<-(yy.hat-yy) ; L1<-check.fun(di,tau)%*%ww #L1<-tau*di[di>0]%*%ww[di>0] + (tau-1)*di[di<0]%*%ww[di<0] return(L1) } #================================= PE.KFOLDCV.FUN<-function(bw.wk, mydata, tau, mykern, x.point, kfold){ #--- k-fold CV --- nn<-nrow(mydata) ; kidx<-rep(1:kfold, nn/kfold+1)[1:nn] min.x<-min(x.point) ; max.x<-max(x.point) idx<-mydata[,1]>=min.x & mydata[,1]<=max.x pe.L1<-0 for (k in 1:kfold){ data.train<-mydata[kidx!=k,] data.test <-mydata[kidx==k*idx,] #--- do not look at out of the range train.ft<-loc.qreg(data.train, tau, bw.wk, mykern=mykern, x.point=x.point) pe.L1<-pe.L1 + L1Err(train.ft, tau, data.test) } print(c(bw.wk, pe.L1)) return(pe.L1) } #================================= #================================================================ # Local Linear Quantile Regression Optimal #================================================================ loc.qreg.opt<-function(smdata, tau, bw.ini, mykern, grid=300, x.range=NULL, x.point=NULL, kfold=3, tol.err=0.0001){ #--IN----------------# # smdata : data[x, y, weight] # tau : quantile to regress 0.5=median # bw.ini : initial bandwidth # mykern : kernel function "gauss" or "epan" # grid : (1x1) # of grid # x.range : (2x1) range of index to smooth over # x.point : # kfold : specify k to implement k-fold CV #--OUT---------------# # ft : z=f_tau(z) #--------------------# #--- range and points on xaxis for smoothing ---# xi<-smdata[,1] ; zz<-NULL if (!is.null(x.point)){zz<-x.point} if (is.null(x.point)){ if (!is.null(x.range)){zz <- seq(min(x.range), max(x.range), length.out=grid)} else{zz <- quantile(xi,seq(0.03,0.97,length=grid))} } #--- Bandwidth Optimization ---# L1.optimize<-optimize(PE.KFOLDCV.FUN, interval=c(0, bw.ini*10), mydata=smdata, tau=tau, mykern=mykern, x.point=zz, kfold=kfold, tol=tol.err) bw.opt<-L1.optimize$min min.objective<-L1.optimize$objective #--- fitting with bw.opt ---# ft<-loc.qreg(smdata, tau, bw.opt, mykern=mykern, x.point=zz) ##L1.err<-L1Err(ft, tau, smdata) return(list(ft=ft, bw.opt=bw.opt)) } #================================================================= ########################## # Example ########################## #--- pbc data--- D=subset(pbc, select=c("time","status","age","albumin","edema","protime","bili")) D=D[!is.na(apply(D,1,mean)),] ; dim(D) D$status=as.numeric(D$status==2) head(D) indata1=D; indata0=D[,-7] ; n=nrow(D) ; time=D$time status=D$status t0=365*5 #--- fit Cox ---# ft.PBC1<-Est.HR.FUN(as.matrix(indata1),t0) ft.PBC0<-Est.HR.FUN(as.matrix(indata0),t0) Wi<- WGT.FUN(D, t0) resp<-status ; resp[time>t0]<-0 ; resp[time0.9999]<-0.9999 yy<-ft.PBC1$risk ; yy[yy>0.9999]<-0.9999 out.org<-cbind(xx, yy, Wi, status) case.org<-out.org[status==1 & Wi>0,] cont.org<-out.org[status==0 & Wi>0,] cens.org<-out.org[status==0 & Wi==0,] #-- transform (beta'z scale) ---# xx<-log(-log(xx)) ; yy<-log(-log(yy)) ; out<-cbind(xx,yy,Wi,status) case<-out[status==1 & Wi>0,] cont<-out[status==0 & Wi>0,] cens<-out[status==0 & Wi==0,] #----------------------------------------------------------# # Estimated Quantiles of D Given P1 For Cases and Controls # #----------------------------------------------------------# bw.ini<-var(xx); mykern="gauss"; grid=300; kfold=10 ; rcase<-quantile(case[,1],prob=c(0.03,0.97)) ; exp(-exp(rcase)) ; rcase ; rcont<-quantile(cont[,1],prob=c(0.03,0.97)) ; exp(-exp(rcont)) ; rcont ; tau=0.5 ; ft.PBC.case.50<-loc.qreg.opt(case, tau, bw.ini, mykern, grid=grid, kfold=kfold, tol.err=0.0001) ft.PBC.cont.50<-loc.qreg.opt(cont, tau, bw.ini, mykern, grid=grid, kfold=kfold, tol.err=0.0001) tau=0.25 ; ft.PBC.case.25<-loc.qreg.opt(case, tau, bw.ini, mykern, grid=grid, kfold=kfold, tol.err=0.0001) ft.PBC.cont.25<-loc.qreg.opt(cont, tau, bw.ini, mykern, grid=grid, kfold=kfold, tol.err=0.0001) tau=0.75 ; ft.PBC.case.75<-loc.qreg.opt(case, tau, bw.ini, mykern, grid=grid, kfold=kfold, tol.err=0.0001) ft.PBC.cont.75<-loc.qreg.opt(cont, tau, bw.ini, mykern, grid=grid, kfold=kfold, tol.err=0.0001) PBC.fit <- list("fit50"=list(ft.PBC.case.50,ft.PBC.cont.50), "fit25"=list(ft.PBC.case.25,ft.PBC.cont.25), "fit75"=list(ft.PBC.case.75,ft.PBC.cont.75)) #save(PBC.fit,file="Sample-Fit.RData") bw.case<-ft.PBC.case.50$bw.opt ; bw.case bw.cont<-ft.PBC.cont.50$bw.opt ; bw.cont ################################ case.zz <-ft.PBC.case.50$ft[,1] case.med<-ft.PBC.case.50$ft[,2] case.q25<-ft.PBC.case.25$ft[,2] case.q75<-ft.PBC.case.75$ft[,2] cont.zz <-ft.PBC.cont.50$ft[,1] cont.med<-ft.PBC.cont.50$ft[,2] cont.q25<-ft.PBC.cont.25$ft[,2] cont.q75<-ft.PBC.cont.75$ft[,2] all.zz<-c(case.zz, cont.zz) #--- rescaling--- case.plot.iqr<-exp(-exp(cbind(case.zz, case.med, case.q25, case.q75))) cont.plot.iqr<-exp(-exp(cbind(cont.zz, cont.med, cont.q25, cont.q75))) #--- difference--- case.plot.iqr.diff<-case.plot.iqr case.plot.iqr.diff[,2]<-case.plot.iqr[,2]-case.plot.iqr[,1] case.plot.iqr.diff[,3]<-case.plot.iqr[,3]-case.plot.iqr[,1] case.plot.iqr.diff[,4]<-case.plot.iqr[,4]-case.plot.iqr[,1] cont.plot.iqr.diff<-cont.plot.iqr cont.plot.iqr.diff[,2]<-cont.plot.iqr[,2]-cont.plot.iqr[,1] cont.plot.iqr.diff[,3]<-cont.plot.iqr[,3]-cont.plot.iqr[,1] cont.plot.iqr.diff[,4]<-cont.plot.iqr[,4]-cont.plot.iqr[,1] #------------------------------------------- # Graph #------------------------------------------- Lim<-c(0,1); YYlim=c(-0.5,0.5) Xlab=expression(hat(p)[1]) Ylab=expression(paste(hat(D))) pdf(file="Sample-Figure.pdf", width=6, height=10) par(mfrow=c(3,1),omi=c(0.4,0.4,0.2,0.1), mai=c(0.4, 0.3, 0.1, 0)) #--- Median and IQR (With Event) ---- plot(case.plot.iqr.diff[,1:2],type="l", lwd=3, xlim=Lim, ylim=YYlim, xlab="", ylab=Ylab,bty="l", xaxt="n") shade(case.plot.iqr.diff[,1], case.plot.iqr.diff[,3], case.plot.iqr.diff[,4], col="lightgray",density=-1,border="lightgray",lwd=5) lines(case.plot.iqr.diff[,1:2], lwd=3) lines(c(0,1),c(0,0), lwd=1, col="darkgray") mtext(side=3,text="(a)",font=2) mtext(side=2,text=Ylab,line=3) axis(1, at=seq(0,1,by=0.2), labels=rep("",6)) #--- Median and IQR (Without Event) ---- plot(cont.plot.iqr.diff[,1:2],type="l", lwd=3, xlim=Lim, ylim=YYlim, xlab="", ylab=Ylab,bty="l", xaxt="n") shade(cont.plot.iqr.diff[,1], cont.plot.iqr.diff[,3], cont.plot.iqr.diff[,4], col="lightgray",density=-1,border="lightgray",lwd=5) lines(cont.plot.iqr.diff[,1:2], lwd=3) lines(c(0,1),c(0,0), lwd=1, col="darkgray") mtext(side=3,text="(b)",font=2) mtext(side=2,text=Ylab,line=3) axis(1, at=seq(0,1,by=0.2), labels=rep("",6)) bw<-0.4; all.xx<-bkde(all.zz,bandwidth=bw) ; all.xx<-bkde(all.zz); all.xx$x<-exp(-exp(all.xx$x)) Ylim<-c(0, max(all.xx$y)+0.2) plot(all.xx, type="l", lty=1, lwd=2, xlim=Lim, ylim=Ylim, xlab=Xlab, ylab="Relative frequency",bty="l") mtext(side=3,text="(c)",font=2) mtext(side=2,text="Relative frequency",line=3) mtext(side=1,text=Xlab,outer=T,line=0.5) dev.off()