In this script we will investigate the association between Drug sensitivity and CNV status in the TCGA breast cancer samples....
Set the root directory to the location of the data. This must be modifed for your own use (based on the location of the files).


```r
theRootDir <- "/mnt/data_scratch/finalData/"
```

1st try this on breast cancer samples, predicting from allSolidTumors


```r
library("pRRophetic")
```

Load BRCA CNV and IC50. The CNV data was created in "map_cnvs_to_genes.R", the expression data in "batch_correct_tcga_data.R" and the drug predictions in "getPredsOnAllTCGA_batchCorrData.R"


```r
load(paste(theRootDir, "dataIn/tcga_cnv_subtracted/cnvsMappedToGenes/BRCA.RData", sep="")) # theCnvQuantVecList_mat, tumorSamps
load(file=paste(theRootDir, "/dataIn/tenRuvNewStandardApproach.RData", sep="")) # cancerTypesVec, tenRuvNewStandardApproach
load(paste(theRootDir, "dataOut/allDrugPredictions_mat.RData", sep="")) # allDrugPredictions_mat, cancerTypesVec
colnames(allDrugPredictions_mat) <- gsub(".", "-", colnames(allDrugPredictions_mat), fixed=T)
```

Load the breast cancer CNV data and find samples for which we also have drug sensitivty predictions.


```r
diseaseAbbrvs <- c("ACC", "BLCA", "BRCA", "CESC", "CHOL", "COAD", "DLBC", "GBM", "HNSC", "KICH", "KIRC", "KIRP", "LAML", "LGG", "LIHC", "LUAD", "LUSC", "MESO", "OV", "PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "TGCT", "THCA", "THYM", "UCEC", "UCS", "UVM")
tumSampCnvMat <- theCnvQuantVecList_mat[, tumorSamps]
oLapSamps <- colnames(allDrugPredictions_mat)[colnames(allDrugPredictions_mat) %in% colnames(tumSampCnvMat)] # these do not match
```

Sample names do not match between the CNV and expression data, thus I need to match the IDs of the participants.


```r
patPred <- sapply(strsplit(colnames(allDrugPredictions_mat), "-"), function(a)a[3]) # get the patient id from the drug predictions
pat01A <- which(sapply(strsplit(colnames(allDrugPredictions_mat), "-"), function(a)a[4]) == "01A") # get the "01A" samples, these are the tumor samples and the A means the first replicate (if there are replicates).
patCnv <- sapply(strsplit(colnames(tumSampCnvMat), "-"), function(a)a[3]) # get the patient IDs for these breast cancer CNV samples.
colnames(tumSampCnvMat) <- patCnv
relevantPatients <- patCnv[patCnv %in% intersect(patPred[pat01A], patPred[cancerTypesVec == "BRCA"])] # 01A breast cancer patients for whom we have both CNV and drug predictions
```

Get the predictions for Laptinib for only breast cancer patients, who are "01A" and have matched CNV data.


```r
brca01aPreds <- allDrugPredictions_mat[, intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aPreds_patNames <- sapply(strsplit(colnames(brca01aPreds), "-"), function(a)a[3])
colnames(brca01aPreds) <- brca01aPreds_patNames
brca_preds_with_cnvs <- brca01aPreds["Lapatinib", relevantPatients]
```

Now run t-tests and wilcoxon rank sum tests for every amplification, can the ERBB2 amplification be identified from these data?


```r
pValsOut <- numeric()
pValsLm <- numeric()
tTestAmp <- numeric()
tTestDir <- numeric()
pCondOnErbb2 <- numeric()
wilcoxP <- numeric()
wilcoxDir <- numeric()
nAmp <- numeric()
nNotAmp <- numeric()
op <- options(warn = (-1)) # suppress warnings 
for(i in 1:nrow(tumSampCnvMat))
{
  amp <- which(tumSampCnvMat[i, relevantPatients] > 1)
  notAmp <- which(tumSampCnvMat[i, relevantPatients] < 1)
  nAmp[i] <- length(amp)
  nNotAmp[i] <- length(notAmp)
  
  erbb2NotAmp <- which(tumSampCnvMat["ERBB2", relevantPatients][c(amp, notAmp)] < 1)
  
  if(length(amp) > 10)
  {
    pValsOut[i] <- cor.test(tumSampCnvMat[i, relevantPatients], brca_preds_with_cnvs, method="spearman")$p.value # p-value against continuous CNV. This is likely nonsense.
    pValsLm[i] <- coef(summary(lm(brca_preds_with_cnvs~tumSampCnvMat[i, relevantPatients]+tumSampCnvMat["ERBB2", relevantPatients])))[2,4]
    tTestAmp[i] <- t.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value # p-value for t-test between cnved and not cnved
    tTestDir[i] <- (mean(brca_preds_with_cnvs[relevantPatients][amp]) - mean(brca_preds_with_cnvs[relevantPatients][notAmp]))
    wilcoxP[i] <- wilcox.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value
    wilcoxDir[i] <- (median(brca_preds_with_cnvs[relevantPatients][amp]) - median(brca_preds_with_cnvs[relevantPatients][notAmp]))
    resp <- c(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])
    explor <- c(rep("amp", length(amp)), rep("notAmp", length(notAmp)))
    erbb2Amp <- rep("ampE", length(explor))
    erbb2Amp[erbb2NotAmp] <- "notAmpE"
    pCondOnErbb2[i] <- coef(summary(lm(resp~explor+erbb2Amp)))[2,4]
  }
  else
  {
    pValsOut[i] <- NA
    pValsLm[i] <- NA
    tTestAmp[i] <- NA
    tTestDir[i] <- NA
    wilcoxP[i] <- NA
    wilcoxDir[i] <- NA
    pCondOnErbb2[i] <- NA
  }
}
```

Assign names and calculate q values...


```r
names(pValsOut) <- rownames(tumSampCnvMat)
names(pValsLm) <- rownames(tumSampCnvMat)
qTtest <- p.adjust(tTestAmp, method="BH")
names(tTestAmp) <- rownames(tumSampCnvMat)
names(tTestDir) <- rownames(tumSampCnvMat)
```

Create a table of some of the top results, we would expect ERBB2 to be near the top of this list.


```r
resMat <- cbind(tTestAmp[order(tTestAmp)][1:40], tTestDir[order(tTestAmp)][1:40], qTtest[order(tTestAmp)][1:40], nAmp[order(tTestAmp)][1:40], nNotAmp[order(tTestAmp)][1:40])
colnames(resMat) <- c("PvalTests", "betaTtest", "qvalTtest", "numAmp", "numNotAmp")
print(resMat)
```

```
##             PvalTests  betaTtest    qvalTtest numAmp numNotAmp
## PNMT     2.131717e-13 -0.1855012 2.264368e-10    112       949
## TCAP     2.131717e-13 -0.1855012 2.264368e-10    112       949
## PGAP3    2.447084e-13 -0.1849778 2.264368e-10    112       947
## MIEN1    5.188451e-13 -0.1806118 3.600785e-10    111       950
## MIR4728  6.966988e-13 -0.1785517 3.653561e-10    112       950
## GRB7     7.896745e-13 -0.1806983 3.653561e-10    108       948
## PPP1R1B  1.833179e-12 -0.1844752 7.269863e-10    106       951
## STARD3   2.450930e-12 -0.1800699 8.504727e-10    107       947
## NEUROD2  8.477655e-12 -0.1804107 2.614886e-09    102       958
## ERBB2    4.293287e-11 -0.1901659 1.191816e-08     86       945
## ZPBP2    2.184755e-10 -0.1748457 5.513526e-08     94       962
## GSDMB    8.083387e-10 -0.1703977 1.869957e-07     91       967
## ORMDL3   9.255421e-10 -0.1728747 1.976388e-07     90       968
## IKZF3    1.045248e-09 -0.1789772 2.072577e-07     82       943
## LRRC3C   3.556775e-09 -0.1709207 6.390570e-07     86       974
## GSDMA    3.683326e-09 -0.1735671 6.390570e-07     83       974
## PSMD3    1.487876e-08 -0.1759700 2.429615e-06     77       978
## CSF3     2.350264e-08 -0.1688332 3.624630e-06     79       981
## SNORD124 7.642309e-08 -0.1656246 1.116582e-05     77       985
## CDK12    4.849501e-07 -0.1425511 6.731107e-05     76       955
## THRA     5.117412e-07 -0.1708247 6.764731e-05     64       986
## MED24    1.353596e-06 -0.1693292 1.707992e-04     62       972
## NR1D1    2.687171e-06 -0.1639679 3.243298e-04     65       993
## NXPH3    6.640119e-06 -0.1872953 7.680405e-04     44      1014
## MED1     8.178420e-06 -0.1286938 9.081317e-04     72       974
## RAPGEFL1 9.872701e-06 -0.1566107 1.034166e-03     54      1000
## FLJ40194 1.032013e-05 -0.1730648 1.034166e-03     48      1013
## NGFR     1.043107e-05 -0.1870641 1.034166e-03     43      1012
## PHOSPHO1 1.447507e-05 -0.1735327 1.350182e-03     47      1013
## EPN3     1.459130e-05 -0.1799692 1.350182e-03     42      1018
## CASC3    1.582769e-05 -0.1511061 1.417344e-03     57       999
## ABI3     1.897444e-05 -0.1746711 1.605102e-03     46      1011
## MSL1     1.908082e-05 -0.1443380 1.605102e-03     60       997
## GNGT2    2.024867e-05 -0.1739508 1.653245e-03     46      1015
## WIPF2    2.672387e-05 -0.1525884 2.119585e-03     42      1008
## HELZ     2.797258e-05 -0.1688949 2.156997e-03     34      1009
## SLC35B1  3.018034e-05 -0.1787134 2.259502e-03     41      1017
## ACSF2    3.114199e-05 -0.1777976 2.259502e-03     44      1010
## PHB      3.174373e-05 -0.1653610 2.259502e-03     47      1007
## ANKRD40  3.271864e-05 -0.1654006 2.270674e-03     43      1015
```

```r
genes <- rownames(resMat)
```

Show the top results if we use a Wilconxon Rank sum test instead: They are more or less the same.


```r
names(wilcoxDir) <- rownames(tumSampCnvMat)
names(wilcoxP) <- rownames(tumSampCnvMat)
print(cbind(wilcoxP[order(wilcoxP)][1:20], wilcoxDir[order(wilcoxP)][1:20]))
```

```
##                  [,1]       [,2]
## PNMT     2.806035e-13 -0.1689048
## TCAP     2.806035e-13 -0.1689048
## PGAP3    3.518291e-13 -0.1685115
## MIEN1    8.970819e-13 -0.1668479
## GRB7     1.260173e-12 -0.1696236
## MIR4728  1.484594e-12 -0.1642169
## PPP1R1B  1.764195e-12 -0.1685115
## STARD3   3.783053e-12 -0.1616436
## NEUROD2  7.384699e-12 -0.1633319
## ERBB2    2.054086e-11 -0.1748619
## ZPBP2    1.798013e-10 -0.1631121
## IKZF3    6.236899e-10 -0.1638376
## GSDMB    8.240603e-10 -0.1504689
## ORMDL3   8.741725e-10 -0.1562167
## GSDMA    2.641932e-09 -0.1503128
## LRRC3C   3.164589e-09 -0.1485649
## PSMD3    7.730517e-09 -0.1498768
## CSF3     1.614647e-08 -0.1598568
## SNORD124 6.287083e-08 -0.1485854
## THRA     6.140157e-07 -0.1608690
```

We have also created a set of results when we condition on ERBB2. When we do this, we identify the secondary drug target EGFR. We also identify a number of ABC transporters, which are known to be involved in multidrug resistance....


```r
names(pCondOnErbb2) <- rownames(tumSampCnvMat)
print(sort(pCondOnErbb2)[1:20])
```

```
##        ERBB2     EGFR-AS1        ABCA9        ABCA6        RAD52 
## 7.580633e-12 3.231632e-04 5.328181e-04 5.341892e-04 9.134247e-04 
##        ABCA8         PRB3         PRB1         PRB2         POP4 
## 1.446654e-03 1.554556e-03 1.557034e-03 1.557034e-03 2.287474e-03 
## RP11-434C1.1    LOC284395       ARNTL2     C12orf71        MED21 
## 2.918788e-03 3.482868e-03 3.894514e-03 3.929694e-03 3.939667e-03 
##     MIR4524A     MIR4524B       ABCA10       MYO18A        ETNK1 
## 4.024321e-03 4.024321e-03 4.047479e-03 4.060033e-03 4.222564e-03
```

Create a scatter plot of the predicted sensitivity Vs ERBB2 amplification.


```r
svg(paste(theRootDir, "figures/erbb2VsLapatinib.svg", sep=""), width=4, height=4)
plot(tumSampCnvMat["ERBB2", relevantPatients], brca_preds_with_cnvs, pch=20, col="#00000044", xlab="CNV", ylab="Predicted Lapatinib IC50")
dev.off()
```

```
## png 
##   2
```

Create a plot showing that predicted lapatinib sensitivity increases with increasing copy number of ERBB2


```r
splitOnCnvNum <- tumSampCnvMat["ERBB2", relevantPatients]
splitOnCnvNum[splitOnCnvNum < 1] <- 0
splitOnCnvNum[splitOnCnvNum > 1 & splitOnCnvNum < 2] <- 1
splitOnCnvNum[splitOnCnvNum > 2 & splitOnCnvNum < 3] <- 2
splitOnCnvNum[splitOnCnvNum > 3] <- 3
a <- split(brca_preds_with_cnvs, splitOnCnvNum)
names(a) <- c("<1", "1-2", "2-3", ">3")
svg(paste(theRootDir, "figures/erbb2VsLapatinib_bplot.svg", sep=""), width=3, height=4)
boxplot(a, col=c("#eff3ff", "#bdd7e7", "#6baed6", "#2171b5"), ylab="Predicted Lapatinib Sensitivity", xlab="Normalized Copy Number", pch=20, cex.axis=.75, outcol="#00000033")
dev.off()
```

```
## png 
##   2
```

Plot samples that are HER2 amplified or not amplifed against PC1 and PC2. of the corresponding gene expression matrix. This is a supplementary figure.


```r
brca01aExpr <- tenRuvNewStandardApproach[, intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aExpr_patNames <- sapply(strsplit(colnames(brca01aExpr), ".", fixed=T), function(a)a[3])
colnames(brca01aExpr) <- brca01aExpr_patNames
brca_expr_with_cnvs <- brca01aExpr[, relevantPatients]
pcOut <- prcomp(t(brca_expr_with_cnvs))$x
thePcs <- pcOut$x
```

```
## Error in pcOut$x: $ operator is invalid for atomic vectors
```

```r
hasErbb2Amp <- as.numeric(tumSampCnvMat["ERBB2", relevantPatients] > 1)
```

Print the P-values for the association of ERBB2 amplification and of the PCs of the gene expression matrix.


```r
for(i in 1:10)
{
  print(wilcox.test(thePcs[,i][which(hasErbb2Amp == 1)], thePcs[,i][which(hasErbb2Amp == 0)])$p.value)
}
```

```
## Error in wilcox.test(thePcs[, i][which(hasErbb2Amp == 1)], thePcs[, i][which(hasErbb2Amp == : object 'thePcs' not found
```

Make a plot for the strongest associated PCs, that is PCs 4 and 5.


```r
thecols <- hasErbb2Amp
thecols[thecols == 1] <- "#377eb899"
thecols[thecols == 0] <- "#4daf4a99"
svg(paste(theRootDir, "figures/erbb2AgainstExpressionPcs.svg", sep=""), width=3, height=4)
plot(thePcs[,4], thePcs[,5], col=thecols, pch=1, xlab="PC4", ylab="PC5", cex.axis=.7, las=1)
```

```
## Error in plot(thePcs[, 4], thePcs[, 5], col = thecols, pch = 1, xlab = "PC4", : object 'thePcs' not found
```

```r
legend("bottomleft", inset=.05, title="HER2 Status", c("Amplified","Not Amplified"), fill=c("#377eb899", "#4daf4a99"), cex=0.5)
```

```
## Error in strwidth(legend, units = "user", cex = cex, font = text.font): plot.new has not been called yet
```

```r
dev.off()
```

```
## png 
##   2
```

Plot the effect size in the region around the ERBB2 locus: Using this approach we can identify ERBB2 as the causitive gene in this locus.


```r
# First load the GRanges object that was used in the creation of the CNV -> gene mapping, i.e. TxDb.Hsapiens.UCSC.hg19.knownGene
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
library(GenomicFeatures)
geneRanges <- genes(txdb)
library(org.Hs.eg.db)
e2s = toTable(org.Hs.egSYMBOL)
syms <- e2s[, "symbol"]
names(syms) <- e2s[, "gene_id"]
mcols(geneRanges)$gene_sym <- syms[as.character(mcols(geneRanges)$gene_id)]
save(geneRanges, file=paste(theRootDir, "dataIn/geneRangesHg19.RData", sep=""))


theDf <- as.data.frame(geneRanges)
theDf_filt <- theDf[!is.na(theDf$gene_sym), ] # remove the rows with nas for gene symbols 
rownames(theDf_filt) <- theDf_filt$gene_sym
resLocs_ <- theDf_filt[genes, c("gene_sym", "seqnames", "start", "end")]
resLocs_2 <- resLocs_[abs(resLocs_$start) < 4.5e7, ] # we want to restrict to the window around erbb2
save(resLocs_2, file=paste(theRootDir, "dataOut/resLocs_2.RData", sep="")) # I will also make this same plot in the GDSC cell line dataset...

startVec <- abs(resLocs_2$start)
endVec <- abs(resLocs_2$end)
midVec <- ((startVec + endVec) / 2)
textVec <- names(resMat[resLocs_2$gene_sym, 2])
yVec <- resMat[resLocs_2$gene_sym, 2]
pVec <- resMat[resLocs_2$gene_sym, 1]
segmentsMat <- cbind(startVec, yVec, endVec, yVec)

# Create the plot that has line segments for the length of the genes
svg(paste(theRootDir, "figures/erbb2LocSegs.svg", sep=""), width=5, height=4)
plot(abs(resLocs_2$start)/1000000, resMat[resLocs_2$gene_sym, 2], xlab="Chromosome 17 Location (megabases)", ylab="Effect Size", las=1, pch=20, col="#ffffff", cex.axis=.7)
segments(startVec/1000000, yVec, endVec/1000000, yVec)
segments(startVec/1000000, yVec+0.0005, startVec/1000000, yVec-0.0005)
segments(endVec/1000000, yVec+0.0005, endVec/1000000, yVec-0.0005)
text(midVec/1000000, yVec+0.0013, textVec, cex=.3)
# lines(xLine/1000000, yLine)
dev.off()
```

```
## png 
##   2
```

```r
# Create a similar plot that just has a point at the midpoint of the gene location, this is probably a lot better.
svg(paste(theRootDir, "figures/erbb2LocPoints.svg", sep=""), width=5, height=4)
plot(midVec/1000000, yVec, xlab="Chromosome 17 Location (megabases)", ylab="Effect Size", las=1, pch=20, col="#00000099", cex.axis=.7)
text(midVec/1000000, yVec+0.0013, textVec, cex=.3)
dev.off()
```

```
## png 
##   2
```

```r
# Overlay p-values on the plot above with GGplot2....
library(ggplot2)
dat <- data.frame(x=midVec/1000000, y=yVec, Drug=textVec, pVal=-log10(pVec))
svg(paste(theRootDir, "figures/Erbb2Loc_points.svg", sep=""), width=6, height=4)
ggplot(data=dat, aes(x=x, y=y)) + theme_bw() + geom_point(aes(fill=pVal), size=I(3), pch=21) + geom_text(aes(label=Drug), vjust=-.5, hjust=-.24, size=2.5, angle=15) + ylab("Lapatinib Effect Size for HER2+ vs HER2-") + xlab("Chromosome 17 Location (megabases)") + scale_color_continuous(low="steelblue4",high="tomato2", name="-Log10 P-value") + theme(legend.position=c(.9,.2))
dev.off()
```

```
## png 
##   2
```

```r
# Different version of this plot (with no borders on the points, possibly harder to see some points)
library(ggplot2)
dat <- data.frame(x=midVec/1000000, y=yVec, Drug=textVec, pVal=-log10(pVec))
svg(paste(theRootDir, "figures/Erbb2Loc_points.svg", sep=""), width=7, height=5)
ggplot(data=dat, aes(x=x, y=y)) + theme_bw() + geom_point(aes(color=pVal), size=I(3)) + geom_text(aes(label=Drug), vjust=-.5, hjust=-.24, size=2.5, angle=15) + ylab("Lapatinib Effect Size for HER2+ vs HER2-") + xlab("Chromosome 17 Location (megabases)") + scale_color_continuous(low="steelblue4",high="tomato2", name="-Log10 P-value") + theme(legend.position=c(.9,.2))
dev.off()
```

```
## png 
##   2
```

Apply predictions across all CNVs and drugs for breast cancer samples. Can we identify any novel associations!?


```r
possibleDrugs <- c("A.443654", "A.770041", "ABT.263", "ABT.888", "AG.014699", "AICAR", "AKT.inhibitor.VIII", "AMG.706", "AP.24534", "AS601245", "ATRA", "AUY922", "Axitinib", "AZ628", "AZD.0530", "AZD.2281", "AZD6244", "AZD6482", "AZD7762", "AZD8055", "BAY.61.3606", "Bexarotene", "BI.2536", "BIBW2992", "Bicalutamide", "BI.D1870", "BIRB.0796", "Bleomycin", "BMS.509744", "BMS.536924", "BMS.708163", "BMS.754807", "Bortezomib", "Bosutinib", "Bryostatin.1", "BX.795", "Camptothecin", "CCT007093", "CCT018159", "CEP.701", "CGP.082996", "CGP.60474", "CHIR.99021", "CI.1040", "Cisplatin", "CMK", "Cyclopamine", "Cytarabine", "Dasatinib", "DMOG", "Docetaxel", "Doxorubicin", "EHT.1864", "Elesclomol", "Embelin", "Epothilone.B", "Erlotinib", "Etoposide", "FH535", "FTI.277", "GDC.0449", "GDC0941", "Gefitinib", "Gemcitabine", "GNF.2", "GSK269962A", "GSK.650394", "GW.441756", "GW843682X", "Imatinib", "IPA.3", "JNJ.26854165", "JNK.9L", "JNK.Inhibitor.VIII", "JW.7.52.1", "KIN001.135", "KU.55933", "Lapatinib", "Lenalidomide", "LFM.A13", "Metformin", "Methotrexate", "MG.132", "Midostaurin", "Mitomycin.C", "MK.2206", "MS.275", "Nilotinib", "NSC.87877", "NU.7441", "Nutlin.3a", "NVP.BEZ235", "NVP.TAE684", "Obatoclax.Mesylate", "OSI.906", "PAC.1", "Paclitaxel", "Parthenolide", "Pazopanib", "PD.0325901", "PD.0332991", "PD.173074", "PF.02341066", "PF.4708671", "PF.562271", "PHA.665752", "PLX4720", "Pyrimethamine", "QS11", "Rapamycin", "RDEA119", "RO.3306", "Roscovitine", "Salubrinal", "SB.216763", "SB590885", "Shikonin", "SL.0101.1", "Sorafenib", "S.Trityl.L.cysteine", "Sunitinib", "Temsirolimus", "Thapsigargin", "Tipifarnib", "TW.37", "Vinblastine", "Vinorelbine", "Vorinostat", "VX.680", "VX.702", "WH.4.023", "WO2009093972", "WZ.1.84", "X17.AAG", "X681640", "XMD8.85", "Z.LLNle.CHO", "ZM.447439")
theGeneNames <- rownames(theCnvQuantVecList_mat)
doPredict <- function(k, theDrugPredictions)
{
  predOnAll_residualized <- theDrugPredictions[possibleDrugs[k], ]
  
  # get the cnv data for breast cancer....
  tumSampCnvMat <- theCnvQuantVecList_mat[, tumorSamps]
  oLapSamps <- names(predOnAll_residualized)[names(predOnAll_residualized) %in% colnames(tumSampCnvMat)]

  # Match participants in both datasets.
  patPred <- sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[3])
  pat01A <- which(sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[4]) == "01A")
  patCnv <- sapply(strsplit(colnames(tumSampCnvMat), "-"), function(a)a[3])
  colnames(tumSampCnvMat) <- patCnv

  relevantPatients <- patCnv[patCnv %in% intersect(patPred[pat01A], patPred[cancerTypesVec == "BRCA"])] # 01A breast cancer patients

  brca01aPreds <- predOnAll_residualized[intersect(pat01A, which(cancerTypesVec == "BRCA"))]
  brca01aPreds_patNames <- sapply(strsplit(names(brca01aPreds), "-"), function(a)a[3])
  names(brca01aPreds) <- brca01aPreds_patNames

  brca_preds_with_cnvs <- brca01aPreds[relevantPatients]

  tTestAmp <- numeric()
  tTestDir <- numeric()
  wilcoxP <- numeric()
  wilcoxDir <- numeric()
  numAmpVec <- numeric()
  numNotAmpVec <- numeric()
  for(i in 1:nrow(tumSampCnvMat))
  {
    amp <- which(tumSampCnvMat[i, relevantPatients] > 1)
    notAmp <- which(tumSampCnvMat[i, relevantPatients] < 1)
    
    if(length(amp) > 5)
    {
      tTestAmp[i] <- t.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value # p-value for t-test between cnved and not cnved
      tTestDir[i] <- (mean(brca_preds_with_cnvs[relevantPatients][amp]) - mean(brca_preds_with_cnvs[relevantPatients][notAmp]))
      wilcoxP[i] <- wilcox.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value
      wilcoxDir[i] <- (median(brca_preds_with_cnvs[relevantPatients][amp]) - median(brca_preds_with_cnvs[relevantPatients][notAmp]))
      numAmpVec[i] <- length(amp)
      numNotAmpVec[i] <- length(notAmp)
    }
  }
  
  outMat <- cbind(tTestAmp, tTestDir, wilcoxP, wilcoxDir, numAmpVec, numNotAmpVec)
  rownames(outMat) <- theGeneNames[1:23046]
  colnames(outMat) <- c("tTestAmp", "tTestDir", "wilcoxP", "wilcoxDir", "numAmpVec", "numNotAmpVec")
  print(k)
  return(outMat)
}
library("parallel")
allBrcaCnaAssocs <- mclapply(1:138, doPredict, allDrugPredictions_mat, mc.cores=12)
names(allBrcaCnaAssocs) <- possibleDrugs
```

Find the best assocation for each drug, (these are wilcoxon rank sum test p-values......)


```r
minGenes <- list()
minGenesDiff <- list()
ampNumList <- list()
notAmpNumList <- list()
for(i in 1:length(possibleDrugs))
{
  minGenes[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,3][1]
  minGenesDiff[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,4][1]
  ampNumList[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,5][1]
  notAmpNumList[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,6][1]
}
names(minGenes) <- possibleDrugs
names(minGenesDiff) <- possibleDrugs
names(ampNumList) <- possibleDrugs
names(notAmpNumList) <- possibleDrugs
```

Print the associations in a positive direction (i.e. amplificaiton predictive of resistance)


```r
print(sort(unlist(minGenes[minGenesDiff > 0])))
```

```
##     Obatoclax.Mesylate.PGAP3        Vinorelbine.LOC728024 
##                 3.038605e-29                 4.436399e-20 
##                CEP.701.MIEN1         Tipifarnib.LOC728024 
##                 5.952652e-20                 3.867692e-18 
##         Epothilone.B.MIR4728               KU.55933.PGAP3 
##                 6.240719e-17                 3.116550e-16 
##               PLX4720.ORMDL3           Z.LLNle.CHO.ERLIN2 
##                 4.334573e-16                 4.465949e-16 
##        Gemcitabine.LOC728024          PF.562271.LOC728024 
##                 6.613063e-16                 1.111935e-15 
##            BAY.61.3606.CCND1               Shikonin.PROSC 
##                 1.801834e-15                 2.183088e-15 
##            Cyclopamine.PROSC             Cytarabine.MIEN1 
##                 2.882756e-15                 4.433241e-15 
##                AZ628.MIR4728                  MG.132.OXR1 
##                 1.706294e-14                 1.722290e-14 
##                X681640.MIEN1             BI.D1870.MIR4737 
##                 3.340065e-14                 3.501819e-14 
##            Embelin.LOC728024           GSK.650394.SUMO1P1 
##                 5.361023e-14                 6.668509e-14 
##              AS601245.STARD3                JNK.9L.ERLIN2 
##                 3.352375e-13                 6.997429e-13 
##           Pyrimethamine.PNMT                AZD7762.MIEN1 
##                 9.330410e-13                 1.230506e-12 
##                AUY922.GPR124             Etoposide.ERLIN2 
##                 1.321776e-12                 1.353113e-12 
##           Doxorubicin.ERLIN2           Bortezomib.POU5F1B 
##                 1.577490e-12                 1.838159e-12 
##             Bexarotene.ZFPM2            NU.7441.LOC653653 
##                 4.827938e-12                 4.931655e-12 
##                CI.1040.IKZF3            Roscovitine.GDPD4 
##                 1.780560e-11                 2.606216e-11 
##              NSC.87877.PROSC                RDEA119.TRPS1 
##                 3.137257e-11                 3.514339e-11 
##             Mitomycin.C.PCK1                 RO.3306.PNMT 
##                 6.527495e-11                 6.975004e-11 
##             CHIR.99021.CDH17               GDC.0449.MIEN1 
##                 9.433678e-11                 1.070058e-10 
##                BX.795.APPBP2             AG.014699.STARD3 
##                 1.602976e-10                 1.761085e-10 
##                  AICAR.PGAP3             Vinblastine.MCM4 
##                 2.037792e-10                 2.522408e-10 
##            SB.216763.FAM91A1              Bosutinib.ERBB2 
##                 2.544441e-10                 2.576029e-10 
##              AP.24534.STARD3         Thapsigargin.SUMO1P1 
##                 3.658541e-10                 4.626154e-10 
## AKT.inhibitor.VIII.LOC728024                  QS11.ZNF217 
##                 7.789714e-10                 8.292796e-10 
##          AZD.2281.SHANK2-AS1                  TW.37.MIEN1 
##                 1.212760e-09                 1.488899e-09 
##             CCT018159.ZNF703              Docetaxel.PGAP3 
##                 1.639920e-09                 1.905954e-09 
##               Bleomycin.PCK1              ZM.447439.MYEOV 
##                 2.105569e-09                 4.293971e-09 
##           NVP.BEZ235.MIR4737             PD.0332991.MIEN1 
##                 4.974007e-09                 5.078702e-09 
##                AZD6244.GSDMB           Cisplatin.EIF4EBP1 
##                 7.129934e-09                 7.923850e-09 
##               Axitinib.MIEN1           Methotrexate.ERBB2 
##                 1.151465e-08                 1.376423e-08 
##               GNF.2.C17orf64             Gefitinib.ORMDL3 
##                 5.313810e-08                 5.666531e-08 
##            Midostaurin.MIEN1                XMD8.85.ADRB3 
##                 5.872991e-08                 6.299181e-08 
##           Parthenolide.KAT6A           CGP.60474.EIF4EBP1 
##                 6.310236e-08                 6.744795e-08 
##           Camptothecin.NTSR1            BMS.754807.ZNF217 
##                 2.860432e-07                 3.818678e-07 
##                    DMOG.FGF4                 IPA.3.ERLIN2 
##                 3.896414e-07                 4.407799e-07 
##               SB590885.ERBB2              Erlotinib.MIEN1 
##                 7.220367e-07                 7.846876e-07 
##             BMS.536924.GDPD4                 PAC.1.ERLIN2 
##                 1.566439e-06                 1.800310e-06 
##                FTI.277.TRPS1    WO2009093972.LOC100129361 
##                 1.937772e-06                 2.343748e-06 
##              FH535.LOC728024           Bryostatin.1.TRPS1 
##                 4.996826e-06                 6.630162e-06 
##          Paclitaxel.EIF4EBP1            PD.0325901.WDYHV1 
##                 7.564669e-06                 1.334315e-05 
##              Rapamycin.ERP27            BMS.509744.CLNS1A 
##                 1.849792e-05                 2.149158e-05 
##              X17.AAG.TAS2R50              Sunitinib.GDPD4 
##                 3.645146e-05                 4.735013e-05 
##                LFM.A13.ACER3              Metformin.SPAG4 
##                 6.134401e-05                 1.017338e-04 
##             Dasatinib.APPBP2            WH.4.023.C1orf111 
##                 1.887158e-04                 1.904943e-04 
##             Pazopanib.ZNF420                ABT.888.KDM2A 
##                 3.985901e-04                 4.268825e-04 
##              GW843682X.PROSC          GSK269962A.MIR101-2 
##                 5.257780e-04                 5.446034e-04 
##             PF.02341066.ETV6              BI.2536.WHSC1L1 
##                 6.313841e-04                 7.406662e-04 
##              NVP.TAE684.PRR4 
##                 1.760088e-03
```

Print the associations in a negative direction, i.e. amplification predictive of drug sensitivity.


```r
print(sort(unlist(minGenes[minGenesDiff < 0])))
```

```
##           CGP.082996.CSMD3              AMG.706.PROSC 
##               1.612064e-17               3.157053e-17 
##           GW.441756.ORAOV1          EHT.1864.MIR378D2 
##               1.619676e-15               2.167591e-15 
##              MK.2206.PGAP3            A.443654.STARD3 
##               3.191264e-15               4.365148e-15 
##          BIRB.0796.SLC26A7            PD.173074.EBAG9 
##               1.677502e-14               2.091675e-14 
##               WZ.1.84.PNMT         PF.4708671.SLC26A7 
##               5.523420e-14               1.424001e-13 
##             Lapatinib.PNMT              AZD6482.PGAP3 
##               2.806035e-13               5.947210e-13 
##  JNK.Inhibitor.VIII.RAD54B            Elesclomol.STK3 
##               9.880550e-12               1.884021e-11 
##               GDC0941.STK3              MS.275.SAMD12 
##               5.731972e-11               6.481744e-11 
##            Nutlin.3a.MIR21          Salubrinal.MIR661 
##               1.070968e-10               2.353915e-10 
##            Sorafenib.RNFT1             CCT007093.CHAD 
##               5.235713e-10               3.708401e-09 
##       BMS.708163.LOC728024               VX.702.PPM1D 
##               4.138621e-09               6.243912e-09 
##                  CMK.CSMD3           JW.7.52.1.STARD3 
##               6.498673e-09               1.250637e-08 
##             BIBW2992.ACER3            SL.0101.1.GRHL2 
##               1.516977e-08               4.910921e-07 
##               AZD8055.STK3           A.770041.PPP1R1B 
##               5.326922e-07               6.314740e-07 
##              AZD.0530.SEZ6 S.Trityl.L.cysteine.FBXO32 
##               8.060248e-07               8.116948e-07 
##      Lenalidomide.ZFAT-AS1      Temsirolimus.MIR378D2 
##               9.756524e-07               2.748206e-06 
##       OSI.906.RP11-434C1.1            ABT.263.MIR3662 
##               3.512938e-06               5.466745e-06 
##         Bicalutamide.ACSF2            Nilotinib.RDH10 
##               1.338896e-05               1.617286e-05 
##    Vorinostat.LOC100129361        VX.680.LOC100129361 
##               2.346588e-05               2.357488e-05 
##              ATRA.FLJ46284            Imatinib.ZNF274 
##               2.738357e-05               4.973905e-05 
##         KIN001.135.NEUROD2          JNJ.26854165.STK3 
##               2.147513e-04               5.170602e-04 
##            PHA.665752.PTK7 
##               6.168052e-04
```

Make a table of the top association for every drug, with drug, gene, chromsome, chromosome location, P-value, effect size, number of amplified samples, number of not amplified samples.


```r
genes <- sapply(minGenes, names)
library(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
```

```
##  [1] "ENTREZID"     "PFAM"         "IPI"          "PROSITE"     
##  [5] "ACCNUM"       "ALIAS"        "CHR"          "CHRLOC"      
##  [9] "CHRLOCEND"    "ENZYME"       "MAP"          "PATH"        
## [13] "PMID"         "REFSEQ"       "SYMBOL"       "UNIGENE"     
## [17] "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS" "GENENAME"    
## [21] "UNIPROT"      "GO"           "EVIDENCE"     "ONTOLOGY"    
## [25] "GOALL"        "EVIDENCEALL"  "ONTOLOGYALL"  "OMIM"        
## [29] "UCSCKG"
```

```r
columns(org.Hs.eg.db)
```

```
##  [1] "ENTREZID"     "PFAM"         "IPI"          "PROSITE"     
##  [5] "ACCNUM"       "ALIAS"        "CHR"          "CHRLOC"      
##  [9] "CHRLOCEND"    "ENZYME"       "MAP"          "PATH"        
## [13] "PMID"         "REFSEQ"       "SYMBOL"       "UNIGENE"     
## [17] "ENSEMBL"      "ENSEMBLPROT"  "ENSEMBLTRANS" "GENENAME"    
## [21] "UNIPROT"      "GO"           "EVIDENCE"     "ONTOLOGY"    
## [25] "GOALL"        "EVIDENCEALL"  "ONTOLOGYALL"  "OMIM"        
## [29] "UCSCKG"
```

```r
resLocs <- select(org.Hs.eg.db, keys= genes, columns = c("SYMBOL","CHR","CHRLOC","CHRLOCEND"), keytype = "SYMBOL")
resNodups <- resLocs[!duplicated(resLocs[, "SYMBOL"]),]
rownames(resNodups) <- resNodups[,1]
chr <- resNodups[genes, 2]
chrLoc <- resNodups[genes, 3]
df <- data.frame(drug=names(minGenes), genes=genes, chr=chr, chrLoc=chrLoc, pVals=unlist(minGenes), beta=unlist(minGenesDiff), numAmp=unlist(ampNumList), numNotAmp=unlist(notAmpNumList))
dfOrd <- df[order(df[,"pVals"]), ]
write.table(dfOrd, file=paste(theRootDir, "tables/top_cnv_drug_assocs_brca.txt", sep=""), row.names=F)
```

```
## Error in file(file, ifelse(append, "a", "w")): cannot open the connection
```

Make the plot of Effect-size / P-values for Vinorelbine in the ERLIN2 locus.


```r
topAssocsVbine <- allBrcaCnaAssocs[["Vinorelbine"]][order(allBrcaCnaAssocs[["Vinorelbine"]][,3]), ][1:40,]
resLocs_vBine <- theDf_filt[rownames(topAssocsVbine), c("gene_sym", "seqnames", "start", "end")]
topLocusGenes_vBine <- na.omit(rownames(resLocs_vBine)[resLocs_vBine[, "seqnames"] == "chr8"]) # select out the genes in the region of LOC728024 on chromosome 8, i.e. the region of the strongest association...
resLocs_vBine_2 <- resLocs_vBine[topLocusGenes_vBine, ]
save(resLocs_vBine_2, file=paste(theRootDir, "dataOut/resLocs_vBine_2.RData", sep=""))
```

Overlay p-values on the plot above with GGplot2....


```r
midVec <- ((resLocs_vBine[topLocusGenes_vBine,]$end + resLocs_vBine[topLocusGenes_vBine,]$start) / 2)
yVec <- topAssocsVbine[topLocusGenes_vBine, 2]
textVec <- topLocusGenes_vBine
pVec <- topAssocsVbine[topLocusGenes_vBine, 1]
dat <- data.frame(x=midVec/1000000, y=yVec, Drug=textVec, pVal=-log10(pVec))
svg(paste(theRootDir, "figures/Erlin2Loc_points.svg", sep=""), width=7, height=5)
ggplot(data=dat, aes(x=x, y=y)) + theme_bw() + geom_point(aes(color=pVal), size=I(3)) + geom_text(aes(label=Drug), vjust=-.5, hjust=-.24, size=2.5, angle=15) + ylab("Vinorelbine Effect Size for ERLIN2 Amplified vs Not Amplified") + xlab("Chromosome 8 Location (megabases)") + scale_color_continuous(low="steelblue4",high="tomato2", name="-Log10 P-value") + theme(legend.position=c(.9,.8))
dev.off()
```

```
## png 
##   2
```

Create a scatter plot of the predicted vinorelbine sensitivity Vs ERLIN2 amplification.


```r
predOnAll_residualized <- allDrugPredictions_mat["Vinorelbine", ]
tumSampCnvMat <- theCnvQuantVecList_mat[, tumorSamps]
oLapSamps <- names(predOnAll_residualized)[names(predOnAll_residualized) %in% colnames(tumSampCnvMat)]
patPred <- sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[3])
pat01A <- which(sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[4]) == "01A")
patCnv <- sapply(strsplit(colnames(tumSampCnvMat), "-"), function(a)a[3])
colnames(tumSampCnvMat) <- patCnv
relevantPatients <- patCnv[patCnv %in% intersect(patPred[pat01A], patPred[cancerTypesVec == "BRCA"])] # 01A breast cancer patients
brca01aPreds <- predOnAll_residualized[intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aPreds_patNames <- sapply(strsplit(names(brca01aPreds), "-"), function(a)a[3])
names(brca01aPreds) <- brca01aPreds_patNames
brca_vbine_preds_with_cnvs <- brca01aPreds[relevantPatients]
svg(paste(theRootDir, "figures/erlin2vsVinorelbine.svg", sep=""), width=4, height=4)
plot(tumSampCnvMat["ERLIN2", relevantPatients], brca_vbine_preds_with_cnvs, pch=20, col="#00000044", xlab="ERLIN2 CNV", ylab="Predicted Vinorelbine IC50")
dev.off()
```

```
## png 
##   2
```

Create a boxplot stratifying by CNV for vinorelbine and ERLIN2


```r
splitOnCnvNum <- tumSampCnvMat["ERLIN2", relevantPatients]
splitOnCnvNum[splitOnCnvNum < 1] <- 0
splitOnCnvNum[splitOnCnvNum > 1 & splitOnCnvNum < 2] <- 1
splitOnCnvNum[splitOnCnvNum > 2 & splitOnCnvNum < 3] <- 2
splitOnCnvNum[splitOnCnvNum > 3] <- 3
a <- split(brca_vbine_preds_with_cnvs, splitOnCnvNum)
names(a) <- c("<1", "1-2", "2-3", ">3")
svg(paste(theRootDir, "figures/erlin2VsVinorelbine_bplot.svg", sep=""), width=3, height=4)
boxplot(a, col=c("#eff3ff", "#bdd7e7", "#6baed6", "#2171b5"), ylab="Predicted Vinorelbine Sensitivity", xlab="Normalized Copy Number", pch=20, cex.axis=.75, outcol="#00000033")
dev.off()
```

```
## png 
##   2
```

# Do the CNV analysis on the drug predictions when models were applied to only breast cancer data instead of all of TCGA.


```r
load(file=paste(theRootDir, "dataOut/brcaDrugPredsAll.RData", sep="")) # allSizesOut contains the breast cancer specific predictions (correlate with cancer type )
names(allSizesOut) <- possibleDrugs
brcaDrugMat <- do.call(rbind, allSizesOut)
colnames(brcaDrugMat) <- gsub(".", "-", colnames(brcaDrugMat), fixed=T)
allBrcaCnaAssocs_onlyBrcaPreds <- mclapply(1:138, doPredict, brcaDrugMat, mc.cores=12)
names(allBrcaCnaAssocs_onlyBrcaPreds) <- possibleDrugs
```

Find the best assocation for each drug, (these are wilcoxon rank sum test p-values......)


```r
minGenes <- list()
minGenesDiff <- list()
ampNumList <- list()
notAmpNumList <- list()
for(i in 1:length(possibleDrugs))
{
  minGenes[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,3][1]
  minGenesDiff[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,4][1]
  ampNumList[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,5][1]
  notAmpNumList[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,6][1]
}
names(minGenes) <- possibleDrugs
names(minGenesDiff) <- possibleDrugs
names(ampNumList) <- possibleDrugs
names(notAmpNumList) <- possibleDrugs
```

Print the associations in a positive direction (i.e. amplificaiton predictive of resistance)


```r
print(sort(unlist(minGenes[minGenesDiff > 0])))
```

```
##      Roscovitine.MIR4728         Bexarotene.GRHL2      PF.562271.LOC728024 
##             7.580255e-12             3.219609e-10             3.708790e-10 
##           LFM.A13.ERLIN2       Tipifarnib.PPP1R1B               AZ628.NGFR 
##             6.731744e-10             8.475452e-10             2.737039e-09 
##           AP.24534.PGAP3         Salubrinal.CDK12         GDC.0449.MIR4728 
##             9.659584e-09             1.448147e-08             1.464785e-08 
##         AS601245.PPP1R1B AKT.inhibitor.VIII.PROSC        CGP.60474.C8orf87 
##             1.921149e-08             3.070745e-08             3.367411e-08 
##         BMS.754807.ZMAT4      Vinblastine.MIR4661       Bortezomib.POU5F1B 
##             4.148035e-08             5.124794e-08             5.153286e-08 
##      Docetaxel.LINC00534         Cyclopamine.ANO1         ZM.447439.MRGPRF 
##             5.685914e-08             6.783259e-08             7.533856e-08 
##             AZD6244.MED1         PLX4720.SNORD124 Obatoclax.Mesylate.IKZF3 
##             1.047054e-07             1.579737e-07             2.731635e-07 
##         Cisplatin.RNF170            CEP.701.NR1D1         JNK.9L.LOC728024 
##             2.832690e-07             3.064566e-07             3.441460e-07 
##          Bosutinib.ERBB2           ABT.888.STARD3           XMD8.85.GOLGA7 
##             3.614828e-07             3.734773e-07             3.775054e-07 
##        Cytarabine.MIR486              DMOG.SMIM19         Erlotinib.ERLIN2 
##             4.430264e-07             4.443065e-07             4.462916e-07 
##              BX.795.ANK1        NVP.TAE684.MIR486           Etoposide.FGF4 
##             5.212073e-07             5.863721e-07             5.949510e-07 
##         AZD.2281.MIR4728          AZD6482.MIR4729             CI.1040.FGF3 
##             6.323958e-07             7.075624e-07             1.003976e-06 
##      Bryostatin.1.ERLIN2            RDEA119.GRHL2        CCT018159.C8orf86 
##             1.105213e-06             1.496259e-06             1.711455e-06 
##             Imatinib.MYC          Gefitinib.GSDMB            Rapamycin.MYC 
##             1.858662e-06             1.914926e-06             2.128805e-06 
##         Dasatinib.MIR486      GSK269962A.ZFAT-AS1          SB.216763.LRP12 
##             2.554019e-06             2.871777e-06             2.887660e-06 
##       CHIR.99021.C8orf46      Parthenolide.NKX6-3             AUY922.PROSC 
##             2.946100e-06             3.200228e-06             3.486222e-06 
##            CCT007093.MYC         KIN001.135.TRPS1             MG.132.ZFPM2 
##             3.759236e-06             4.304591e-06             6.349284e-06 
##          AG.014699.GINS4 Z.LLNle.CHO.LOC100130964         Elesclomol.ERBB2 
##             7.872371e-06             1.085015e-05             1.154350e-05 
##          A.770041.NKX6-3      Thapsigargin.NKX6-3       Gemcitabine.RNF170 
##             1.363249e-05             1.472352e-05             1.531667e-05 
##          NSC.87877.MYEOV         PD.0325901.PCGF2          NU.7441.ALDH3B2 
##             2.076468e-05             2.124136e-05             2.619948e-05 
##           TW.37.ZFAT-AS1       NVP.BEZ235.SLC16A6         Sunitinib.NKX6-3 
##             2.829778e-05             2.881948e-05             3.850261e-05 
##          Pazopanib.MED30          Doxorubicin.CA4         BMS.536924.SOX13 
##             4.326583e-05             4.614227e-05             4.947626e-05 
##      Midostaurin.SLC16A6       PF.02341066.MIR486         Bicalutamide.MYB 
##             5.194957e-05             6.363689e-05             6.790317e-05 
##        GDC0941.LOC440461           SB590885.GSDMB         BMS.509744.ZMAT4 
##             7.010826e-05             7.569821e-05             7.783580e-05 
##          WH.4.023.NKX6-3    Methotrexate.B4GALNT2     Camptothecin.UNC93B1 
##             7.983603e-05             8.077737e-05             9.572956e-05 
##         Mitomycin.C.CTSZ          AZD7762.ALDH3B2            VX.680.NKX6-3 
##             9.952987e-05             1.035293e-04             1.086255e-04 
##           FTI.277.MCMDC2       BAY.61.3606.PPFIA1       GNF.2.LOC100130964 
##             1.197050e-04             1.199934e-04             1.229951e-04 
##       Epothilone.B.PROSC            OSI.906.ZMAT4            Nutlin.3a.MYC 
##             1.311228e-04             1.630970e-04             1.632368e-04 
##         PHA.665752.SFRP1            BI.2536.ZMAT4        Paclitaxel.ERLIN2 
##             1.696289e-04             1.736782e-04             1.756232e-04 
##       Pyrimethamine.PNMT      Temsirolimus.ZNF420             X17.AAG.BYSL 
##             1.801079e-04             2.172288e-04             2.237513e-04 
##          JW.7.52.1.SOX13            Embelin.BAALC         Bleomycin.UNC13D 
##             2.390153e-04             2.407707e-04             2.607394e-04 
##              FH535.P4HA3           AZD8055.ZNF420          X681640.UNC93B1 
##             2.895547e-04             3.583911e-04             3.788457e-04 
##          PD.0332991.MED1           Shikonin.TRPS1         GSK.650394.ERBB2 
##             3.922937e-04             4.389813e-04             4.934543e-04 
##       Vinorelbine.MIR298         WO2009093972.MYB           GW843682X.STAR 
##             4.952036e-04             7.714904e-04             8.381963e-04 
##           MK.2206.MIR9-1            QS11.C17orf82       JNJ.26854165.TACC1 
##             9.393943e-04             1.049329e-03             1.060024e-03 
##          Nilotinib.MEF2D        Metformin.MIR1204        Axitinib.ZFAT-AS1 
##             1.150354e-03             3.508275e-03             3.920953e-03 
##             PAC.1.UNC13D 
##             5.548849e-03
```

Print the associations in a negative direction, i.e. amplification predictive of drug sensitivity.


```r
print(sort(unlist(minGenes[minGenesDiff < 0])))
```

```
##              A.443654.NEUROD2       PF.4708671.LOC101241902 
##                  1.024578e-11                  1.157782e-09 
##               Lapatinib.PGAP3     JNK.Inhibitor.VIII.ORAOV1 
##                  1.733334e-08                  2.806220e-08 
##              GW.441756.ORAOV1                 WZ.1.84.PGAP3 
##                  6.468165e-08                  2.167807e-07 
##                AZD.0530.PGAP3        BIRB.0796.LOC100130964 
##                  2.323441e-06                  3.381639e-06 
##                AMG.706.C8orf4                 EHT.1864.ANK1 
##                  1.217321e-05                  1.222739e-05 
##              BIBW2992.ALDH3B2              PD.173074.SBSPON 
##                  1.482095e-05                  1.628961e-05 
##              BMS.708163.TRIQK                 ABT.263.ADRB3 
##                  1.705820e-05                  2.564219e-05 
##                      CMK.PVT1          Lenalidomide.RUNX1T1 
##                  5.549117e-05                  6.770081e-05 
##             CGP.082996.IQGAP3                Sorafenib.LRP5 
##                  2.086722e-04                  2.218286e-04 
##              MS.275.LINC00662              Vorinostat.TAOK1 
##                  2.632803e-04                  2.810888e-04 
##                 BI.D1870.URI1                  KU.55933.MYC 
##                  3.885451e-04                  4.817703e-04 
##                   ATRA.MRGPRF                 RO.3306.CCNE1 
##                  5.257386e-04                  5.318454e-04 
##                  VX.702.PPM1D                 IPA.3.DCAF4L2 
##                  6.826387e-04                  8.598105e-04 
##                 SL.0101.1.MYC                    AICAR.NOS2 
##                  1.204003e-03                  1.889377e-03 
## S.Trityl.L.cysteine.LOC284395 
##                  2.334671e-03
```

```r
# ' What do the correlations for these drug predictions actually look like across the dataset? They are quite highly correlated.
allDrugPredictions_mat_brca <- allDrugPredictions_mat[, colnames(brcaDrugMat)]
theCors <- numeric()
theCorPs <- numeric()
for(i in 1:nrow(allDrugPredictions_mat_brca))
{
  theCors[i] <- cor(allDrugPredictions_mat_brca[i,], brcaDrugMat[i,], method="spearman")
  theCorPs[i] <- cor.test(allDrugPredictions_mat_brca[i,], brcaDrugMat[i,], method="spearman")$p.value
}
names(theCors) <- rownames(allDrugPredictions_mat_brca)
names(theCorPs) <- rownames(allDrugPredictions_mat_brca)
```

The median correlations are high and highly significant....


```r
print(median(theCors))
```

```
## [1] 0.6811614
```

```r
print(median(theCorPs))
```

```
## [1] 0
```

Create a histogram of these correlations


```r
svg(paste(theRootDir, "figures/allOrBcHist.svg", sep=""), width=4, height=4)
hist(theCors, col="black", xlab="Spearman Correlation", main="", las=1, cex.axis=.75, breaks=20)
dev.off()
```

```
## png 
##   2
```

Session Info


```r
print(sessionInfo())
```

```
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.3 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggplot2_2.2.1                           
##  [2] org.Hs.eg.db_2.14.0                     
##  [3] RSQLite_1.0.0                           
##  [4] DBI_0.3.1                               
##  [5] XVector_0.4.0                           
##  [6] TxDb.Hsapiens.UCSC.hg19.knownGene_2.14.0
##  [7] GenomicFeatures_1.16.3                  
##  [8] AnnotationDbi_1.26.1                    
##  [9] Biobase_2.24.0                          
## [10] GenomicRanges_1.16.4                    
## [11] GenomeInfoDb_1.0.2                      
## [12] IRanges_1.22.10                         
## [13] BiocGenerics_0.10.0                     
## [14] pRRophetic_0.5                          
## [15] knitr_1.12.3                            
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.9             lattice_0.20-33        
##  [3] corpcor_1.6.8           Rsamtools_1.16.1       
##  [5] Biostrings_2.32.1       assertthat_0.1         
##  [7] digest_0.6.8            foreach_1.4.3          
##  [9] ridge_2.1-3             plyr_1.8.4             
## [11] BatchJobs_1.6           MatrixModels_0.4-1     
## [13] stats4_3.2.2            evaluate_0.8.3         
## [15] sva_3.10.0              zlibbioc_1.10.0        
## [17] lazyeval_0.2.0          minqa_1.2.4            
## [19] annotate_1.42.1         SparseM_1.7            
## [21] car_2.1-0               nloptr_1.0.4           
## [23] Matrix_1.2-2            checkmate_1.6.2        
## [25] preprocessCore_1.26.1   labeling_0.3           
## [27] splines_3.2.2           lme4_1.1-10            
## [29] BiocParallel_0.6.1      stringr_1.0.0          
## [31] munsell_0.4.3           RCurl_1.95-4.7         
## [33] biomaRt_2.20.0          sendmailR_1.2-1        
## [35] rtracklayer_1.24.2      base64enc_0.1-3        
## [37] mgcv_1.8-7              BBmisc_1.9             
## [39] nnet_7.3-11             fail_1.3               
## [41] tibble_1.2-15           codetools_0.2-14       
## [43] XML_3.98-1.3            GenomicAlignments_1.0.6
## [45] MASS_7.3-44             bitops_1.0-6           
## [47] grid_3.2.2              nlme_3.1-122           
## [49] xtable_1.7-4            gtable_0.2.0           
## [51] magrittr_1.5            scales_0.4.1           
## [53] stringi_1.0-1           genefilter_1.46.1      
## [55] brew_1.0-6              iterators_1.0.8        
## [57] tools_3.2.2             BSgenome_1.32.0        
## [59] markdown_0.7.7          pbkrtest_0.4-2         
## [61] survival_2.40-1         colorspace_1.2-6       
## [63] quantreg_5.19
```

