# figure 2
# effects of different normalization methods on basic properties
# Analysis of Illumina SNP data
##
##
##
## Set the working directory to the root of Analysis, ProcessedData and Rawdata
##
##
##
setwd("/cn_data")
library(beadarraySNP)
load("Analysis/1280260raw.RData")
################################################################################
#  some functions
my.mean<-function(x) { x<-x[!is.na(x)]; if(length(x)>10) mean(x) else NA}
my.sd<-function(x) { x<-x[!is.na(x)]; if(length(x)>10) sd(x) else NA}
#
aggregateSNPint<-function(snpdata,FUN) { aggregate(assayDataElement(snpdata,"intensity"),by=list(numericCHR(reporterInfo(all.260)[,"CHR"])),FUN)[,-1]}
#
mean.colselection<-function(x,selection) median(as.matrix(x[,selection]),na.rm=TRUE)
#
mean.selection<-function(x,selection) median(as.matrix(x[selection]),na.rm=TRUE)
################################################################################
# Only include autosomal chromosomes
probes<-numericCHR(reporterInfo(all.260)[,"CHR"])<90        
all.260<-all.260[probes,]
data.bg.raw.min<-backgroundCorrect.SNP(backgroundEstimate(all.260,"minimum"),"subtract")
data.bg.raw.mode<-backgroundCorrect.SNP(backgroundEstimate(all.260,"mode"),"subtract")
data.bg.raw.anglemode<-backgroundCorrect.SNP(backgroundEstimate(all.260,"anglemode"),"subtract")
quantiledata<-normalizeBetweenAlleles.SNP(all.260)
data.bg.qnt.min<-backgroundCorrect.SNP(backgroundEstimate(quantiledata,"minimum"),"subtract")
data.bg.qnt.mode<-backgroundCorrect.SNP(backgroundEstimate(quantiledata,"mode"),"subtract")
data.bg.qnt.anglemode<-backgroundCorrect.SNP(backgroundEstimate(quantiledata,"anglemode"),"subtract")

background.corrected<-list(raw=all.260, min=data.bg.raw.min, mode=data.bg.raw.mode,
    angmode=data.bg.raw.anglemode, quant=quantiledata, min.q=data.bg.qnt.min,
    mode.q=data.bg.qnt.mode, angmode.q=data.bg.qnt.anglemode)

basedata <-c(   1,   5,   1,   5,   2,   6,   2,   6,   3,   7,   3,   7,   4,   8,   4,   8,   5,   5,   5,   5,   5,   5)
normprob <-c( 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5)
callscore<-c( 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8,   0, 0.1, 0.2, 0.7, 0.8, 0.9)
useAll   <-c(   F,   F,   T,   T,   F,   F,   T,   T,   F,   F,   T,   T,   F,   F,   T,   T,   F,   F,   F,   F,   F,   F)
settings<-data.frame(basedata,normprob,callscore,useAll)

results<-list()
for (method in 1:nrow(settings)) {
  data.nrm<-normalizeWithinArrays.SNP(background.corrected[[settings[method,"basedata"]]],
           callscore=settings[method,"callscore"],normprob=settings[method,"normprob"],
           useAll=settings[method,"useAll"],relative=TRUE,fixed=FALSE,quantilepersample=TRUE)
  results<-c(results,list(normalizeLoci.SNP(data.nrm,normalizeTo=2)))
}

categories<-read.table("Analysis/ChangedChromosomes.txt",sep="\t",header=TRUE,row.names=1,as.is=TRUE)
mean.results<-lapply(results,aggregateSNPint,my.mean)
sd.results<-lapply(results,aggregateSNPint,my.sd)
## Basic parameters normal paraffin/blood
##
normalblood<-pData(all.260)[,"NorTum"]=="N" & pData(all.260)[,"Isolate"]=="B"
pdf("figure2.pdf",width=8,height=8)
par(mfrow=c(2,1),mar=c(0,4,0,1))
##  Basic parameters tumor frozen
linecol<-"grey"
symbolcolors<-c("black","black","black","black")
##
data.categories<-matrix("u",ncol=ncol(all.260),nrow=nrow(categories))
colnames(data.categories)<-sampleNames(all.260)
data.categories[,c("106TV")]<-as.character(categories[,"X106TV"])
data.categories[,c("108TV")]<-as.character(categories[,"X108TV"])
data.categories[,c("44TV")]<-as.character(categories[,"X44TV"])
data.categories[,c("514TV")]<-as.character(categories[,"X514TV"])
sym.size<-1.5
norm.mean<-unlist(lapply(mean.results,mean.colselection,normalblood))
tum.mean<-unlist(lapply(mean.results,mean.selection,data.categories=="n"))
plot(tum.mean,ylim=c(1.4,2.9),ylab="mean",main="",xaxt="n",col=symbolcolors[1],pch=0,cex=sym.size,las=2)
abline(h=c(1.5,2,2.5),col=linecol,lty=3)
abline(v=c(4.5,8.5,12.5,16.5),col=linecol)
points(norm.mean,col=symbolcolors[4],pch=5,cex=sym.size)
tum.amp<-unlist(lapply(mean.results,mean.selection,data.categories=="a"))
points(tum.amp,col=symbolcolors[2],pch=3,cex=sym.size)
tum.del<-unlist(lapply(mean.results,mean.selection,data.categories=="d"))
points(tum.del,col=symbolcolors[3],pch=4,cex=sym.size)
#
# Combined variability , legend
plot(unlist(lapply(sd.results,mean.selection,data.categories=="n")),ylim=c(0.1,1.2),ylab="sd",main="",xaxt="n",yaxt="n",col=symbolcolors[1],pch=0,cex=sym.size)
points(unlist(lapply(sd.results,mean.colselection,normalblood)),col=symbolcolors[4],pch=5,cex=sym.size)
points(unlist(lapply(sd.results,mean.selection,data.categories=="a")),col=symbolcolors[2],pch=3,cex=sym.size)
points(unlist(lapply(sd.results,mean.selection,data.categories=="d")),col=symbolcolors[3],pch=4,cex=sym.size)
# Put text at top of plot
axis(2,at=c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.9,1.0,1.1,1.2),labels=c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,"pGCS","Qnt","GT","BG"),las=2)
abline(h=c(0.95,1.05,1.15),col=linecol)
abline(h=0.85,col="black")
abline(h=c(0.2,0.4,0.6),col=linecol,lty=3)
mergetext<-function(txt,ytxt,ymin,ymax) {
  outtxt<-NULL
  outpos<-NULL
  outlin<-NULL
  outname.prev<-txt[1]
  outname.length<-0
  for (i in 1:length(txt)) {
    if (txt[i]==outname.prev) {
      outname.length<-outname.length+1
    } else {
      outtxt<-c(outtxt,outname.prev)
      outpos<-c(outpos,i-0.5 -(outname.length*0.5))
      outlin<-c(outlin,i-0.5)
      outname.length<-1
      outname.prev<-txt[i]
    }
  }
  outtxt<-c(outtxt,outname.prev)
  outpos<-c(outpos,length(txt)+0.5 -(outname.length*0.5))
  text(outpos,ytxt,outtxt)
  segments(outlin,ymin,outlin,ymax,col=linecol)
}
#
bgnames<-settings[,"basedata"] %% 4
bgnames[bgnames==0]<-4
bgnames<-c("raw","min","mode","angmode")[bgnames]
mergetext(bgnames,1.2,1.15,1.4)
#
use.gt<-rep("Het",nrow(settings))
use.gt[settings[,"useAll"]]<-"All"
mergetext(use.gt,1.1,1.05,1.15)
#
bgquant<-rep("-",nrow(settings))
bgquant[settings[,"basedata"]>4]<-"Q"
mergetext(bgquant,1.0,0.95,1.05)
#
mergetext(settings[,"callscore"],0.9,0.85,0.95)
segments(c(4.5,8.5,12.5,16.5),0,c(4.5,8.5,12.5,16.5),0.85,col=linecol)
dev.off()

