# import raw data for "High Resolution Copy Number Analysis of Paraffin Embedded Archival Tissue Using SNP BeadArrays"
##
##
##
## Set the working directory to the root of Analysis, ProcessedData and Rawdata
##
##
##
setwd("c:/cn_data")
library(beadarraySNP)
addPhenodata<-function(object,targets) {
  targets<-targets[sampleNames(object),]
  phenoData(object)<-new("AnnotatedDataFrame",cbind(pData(object),targets))  
  object
}

# import GenCall files main dataset
opa1.260<-read.SnpSetIllumina("Rawdata/Illumina/1280260/GS_1280260_GT_11032005_opa1.csv","Rawdata/Illumina/OPAinfo")
opa2.260<-read.SnpSetIllumina("Rawdata/Illumina/1280260/GS_1280260_GT_11032005_opa2.csv","Rawdata/Illumina/OPAinfo")
opa3.260<-read.SnpSetIllumina("Rawdata/Illumina/1280260/GS_1280260_GT_11032005_opa3.csv","Rawdata/Illumina/OPAinfo")
opa4.260<-read.SnpSetIllumina("Rawdata/Illumina/1280260/GS_1280260_GT_11032005_opa4.csv","Rawdata/Illumina/OPAinfo")
# Put all data into 1 big object
all.260<-combine(opa1.260,opa2.260,opa3.260,opa4.260)
# exclude samples with median intensity below 1250 in either Red or Green channel
all.260<-removeLowQualitySamples(all.260,1250)
all.260<-removeLowQualityProbes(all.260)
# order by chromosome/ position
ind<-order(numericCHR(reporterInfo(all.260)[,"CHR"]),reporterInfo(all.260)[,"MapInfo"])
all.260<-all.260[ind,]
# Some phenodata
targets<-read.table("Analysis/1280260targets.txt",header=TRUE,row.names=1,sep="\t")
all.260<-addPhenodata(all.260,targets)
save(all.260,file="Analysis/1280260raw.RData")
#############################################
##  Low intensity replicate of main dataset
opa1.201<-read.SnpSetIllumina("Rawdata/Illumina/1280201/GS_1280201_GT_06012005_opa1.csv","Rawdata/Illumina/OPAinfo")
opa2.201<-read.SnpSetIllumina("Rawdata/Illumina/1280201/GS_1280201_GT_06012005_opa2.csv","Rawdata/Illumina/OPAinfo")
opa3.201<-read.SnpSetIllumina("Rawdata/Illumina/1280201/GS_1280201_GT_06012005_opa3.csv","Rawdata/Illumina/OPAinfo")
opa4.201<-read.SnpSetIllumina("Rawdata/Illumina/1280201/GS_1280201_GT_06012005_opa4.csv","Rawdata/Illumina/OPAinfo")
# Put all data into 1 big object
all.201<-combine(opa1.201,opa2.201,opa3.201,opa4.201)
all.201<-removeLowQualitySamples(all.201,300)
all.201<-removeLowQualityProbes(all.201)
# order by chromosome/ position
ind<-order(numericCHR(reporterInfo(all.201)[,"CHR"]),reporterInfo(all.201)[,"MapInfo"])
all.201<-all.201[ind,]
# Some phenodata
targets<-read.table("Analysis/1280260targets.txt",header=TRUE,row.names=1,sep="\t")
all.201<-addPhenodata(all.201,targets)
save(all.201,file="Analysis/1280201raw.RData")
#############################################
#############################################
# Affymetrix, dChip data
dc514<-read.table("Rawdata/Affymetrix/dCHIP_CN_514.txt",header=TRUE,quote="",as.is=TRUE,row.names=1)
dc44.106.108<-read.table("Rawdata/Affymetrix/dCHIP_CN_44_106_108.txt",header=TRUE,quote="",as.is=TRUE,row.names=1)
dc.all<-cbind(dc44.106.108[rownames(dc514),c(1,2,6,8,10)],dc514[,8])
dc.all[,2]<-dc.all[,2]*1e6  ## positions in file seem to be divided by 1000000 compared to ill,bac
dc.all<-dc.all[dc.all[,1]!="X",]
dc.data<-dc.all[,c(3,4,5,6)]
colnames(dc.data)<-c("44TV","106TV","108TV","514TV")
dc.chrompos<-dc.all[,1:2]
dc.chrompos[,1]<-numericCHR(dc.chrompos[,1])
colnames(dc.chrompos)<-c("CHR","MapInfo")
save(dc.data,dc.chrompos,file="Analysis/dchipCN.RData")
#############################################
#############################################
# BAC array normalization
excludePolymorphData<-TRUE
excludeFlagsTreshold<-0 # level of genepix flags (0=exclude all flagged;-1000 include flagged spots)
library(limma)
targets<-readTargets(path="Rawdata/BACarray")
# Retrieve chip-setup
gal<-readGAL("Rawdata/BACarray/CGH13.gal")
layout<-getLayout(gal)
layout$ndups<-3
layout$spacing<-1
#
locationData<-read.table("Rawdata/BACarray/locationdata.txt",header=TRUE,sep="\t",as.is=TRUE)
excludeData<-read.table("Rawdata/BACarray/excludedata.txt",header=TRUE,sep="\t",as.is=TRUE)
polymorphData<-read.table("Rawdata/BACarray/polymorphdata.txt",header=TRUE,sep="\t",as.is=TRUE)
{ if (excludePolymorphData) wt.fun<- function(gpr) as.numeric(!(gal[,"Name"] %in% excludeData[,1] | gal[,"Name"] %in% polymorphData[,1] | gpr[,"Flags"]<excludeFlagsTreshold))
  else wt.fun<- function(gpr) as.numeric(!(gal[,"Name"] %in% excludeData[,1] | gpr[,"Flags"]<excludeFlagsTreshold))
}
RG<-read.maimages(targets$filename,"genepix",path="Rawdata/BACarray",names=targets$name,wt.fun=wt.fun)
RG$printer<-layout
####################################
# Normalization
RG.bc<-backgroundCorrect(RG,method="edwards")
MA<-normalizeWithinArrays(RG.bc)
# remove low-intensity spots
MA$weights<-MA$weights * as.numeric(MA$A>log2(200))
MA$M[MA$weight==0]<-NA
spotaverage<-function(x) {
  # at max 1 NA, and have low variability
  if (sum(is.na(x))<2 && sd(x,na.rm=TRUE)<0.1)  median(x,na.rm=TRUE)
  else NA
}
aggcollumn<-function(x) {
  agg<-aggregate(x,list(genes=MA$genes[,"ID"]),FUN=spotaverage)
  ac<-as.numeric(agg[,2])
  names(ac)<-agg[,1]
  ac
}
avg.copy<-apply(-MA$M,2,aggcollumn)
# remove rows that are NA in all samples (probably excluded or blank)
emptyrows<-apply(avg.copy,1,function(x) all(is.na(x)))
avg.copy<-avg.copy[!emptyrows,]
#
orderednames<-locationData[locationData[,2] %in% rownames(avg.copy),2]
avg.copy.sort<-avg.copy[orderednames,]
loc.data<-locationData[locationData[,2] %in% rownames(avg.copy.sort),]
rownames(loc.data)<-loc.data[,2]
# Remove probes for Z-chromosome
probes<-loc.data[,"Chromosome"]!="Z"
loc.data<-loc.data[probes,]
avg.copy.sort<-avg.copy.sort[probes,]
chrompos.bac<-cbind(CHR=numericCHR(loc.data[,3]), MapInfo=(as.numeric(loc.data[,4])+as.numeric(loc.data[,5])) / 2)
rownames(chrompos.bac)<-rownames(loc.data)
####################################
# Save data
cn.bac<-2^(avg.copy.sort+1)  # Put in linear scale
save(cn.bac,chrompos.bac,file="Analysis/bacdata.RData",compress=TRUE)
#
# Save processed data as asci files
data.nrmp<-standardNormalization(all.260)
write.table(cbind(reporterInfo(data.nrmp)[,c("CHR","MapInfo")],assayData(data.nrmp)[["intensity"]][,c("44TV","106TV","108TV","514TV","44TP","106TP","108TP","514TP","44NB","106NB","108NB","514NB")]),file="Processeddata/illuminaCN.txt",sep="\t")
write.table(cbind(chrompos.bac,cn.bac),file="Processeddata/bacCN.txt",sep="\t")
write.table(cbind(dc.chrompos,dc.data),file="Processeddata/affymetrixCN.txt",sep="\t")
