In this script we will investigate the association between Drug sensitivity and CNV status in the TCGA breast cancer samples…. Set the root directory to the location of the data. This must be modifed for your own use (based on the location of the files).
theRootDir <- "/mnt/data_scratch/finalData/"
1st try this on breast cancer samples, predicting from allSolidTumors
library("pRRophetic")
Load BRCA CNV and IC50. The CNV data was created in “map_cnvs_to_genes.R”, the expression data in “batch_correct_tcga_data.R” and the drug predictions in “getPredsOnAllTCGA_batchCorrData.R”
load(paste(theRootDir, "dataIn/tcga_cnv_subtracted/cnvsMappedToGenes/BRCA.RData", sep="")) # theCnvQuantVecList_mat, tumorSamps
load(file=paste(theRootDir, "/dataIn/tenRuvNewStandardApproach.RData", sep="")) # cancerTypesVec, tenRuvNewStandardApproach
load(paste(theRootDir, "dataOut/allDrugPredictions_mat.RData", sep="")) # allDrugPredictions_mat, cancerTypesVec
colnames(allDrugPredictions_mat) <- gsub(".", "-", colnames(allDrugPredictions_mat), fixed=T)
Load the breast cancer CNV data and find samples for which we also have drug sensitivty predictions.
diseaseAbbrvs <- c("ACC", "BLCA", "BRCA", "CESC", "CHOL", "COAD", "DLBC", "GBM", "HNSC", "KICH", "KIRC", "KIRP", "LAML", "LGG", "LIHC", "LUAD", "LUSC", "MESO", "OV", "PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "TGCT", "THCA", "THYM", "UCEC", "UCS", "UVM")
tumSampCnvMat <- theCnvQuantVecList_mat[, tumorSamps]
oLapSamps <- colnames(allDrugPredictions_mat)[colnames(allDrugPredictions_mat) %in% colnames(tumSampCnvMat)] # these do not match
Sample names do not match between the CNV and expression data, thus I need to match the IDs of the participants.
patPred <- sapply(strsplit(colnames(allDrugPredictions_mat), "-"), function(a)a[3]) # get the patient id from the drug predictions
pat01A <- which(sapply(strsplit(colnames(allDrugPredictions_mat), "-"), function(a)a[4]) == "01A") # get the "01A" samples, these are the tumor samples and the A means the first replicate (if there are replicates).
patCnv <- sapply(strsplit(colnames(tumSampCnvMat), "-"), function(a)a[3]) # get the patient IDs for these breast cancer CNV samples.
colnames(tumSampCnvMat) <- patCnv
relevantPatients <- patCnv[patCnv %in% intersect(patPred[pat01A], patPred[cancerTypesVec == "BRCA"])] # 01A breast cancer patients for whom we have both CNV and drug predictions
Get the predictions for Laptinib for only breast cancer patients, who are “01A” and have matched CNV data.
brca01aPreds <- allDrugPredictions_mat[, intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aPreds_patNames <- sapply(strsplit(colnames(brca01aPreds), "-"), function(a)a[3])
colnames(brca01aPreds) <- brca01aPreds_patNames
brca_preds_with_cnvs <- brca01aPreds["Lapatinib", relevantPatients]
Now run t-tests and wilcoxon rank sum tests for every amplification, can the ERBB2 amplification be identified from these data?
pValsOut <- numeric()
pValsLm <- numeric()
tTestAmp <- numeric()
tTestDir <- numeric()
pCondOnErbb2 <- numeric()
wilcoxP <- numeric()
wilcoxDir <- numeric()
nAmp <- numeric()
nNotAmp <- numeric()
op <- options(warn = (-1)) # suppress warnings
for(i in 1:nrow(tumSampCnvMat))
{
amp <- which(tumSampCnvMat[i, relevantPatients] > 1)
notAmp <- which(tumSampCnvMat[i, relevantPatients] < 1)
nAmp[i] <- length(amp)
nNotAmp[i] <- length(notAmp)
erbb2NotAmp <- which(tumSampCnvMat["ERBB2", relevantPatients][c(amp, notAmp)] < 1)
if(length(amp) > 10)
{
pValsOut[i] <- cor.test(tumSampCnvMat[i, relevantPatients], brca_preds_with_cnvs, method="spearman")$p.value # p-value against continuous CNV. This is likely nonsense.
pValsLm[i] <- coef(summary(lm(brca_preds_with_cnvs~tumSampCnvMat[i, relevantPatients]+tumSampCnvMat["ERBB2", relevantPatients])))[2,4]
tTestAmp[i] <- t.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value # p-value for t-test between cnved and not cnved
tTestDir[i] <- (mean(brca_preds_with_cnvs[relevantPatients][amp]) - mean(brca_preds_with_cnvs[relevantPatients][notAmp]))
wilcoxP[i] <- wilcox.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value
wilcoxDir[i] <- (median(brca_preds_with_cnvs[relevantPatients][amp]) - median(brca_preds_with_cnvs[relevantPatients][notAmp]))
resp <- c(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])
explor <- c(rep("amp", length(amp)), rep("notAmp", length(notAmp)))
erbb2Amp <- rep("ampE", length(explor))
erbb2Amp[erbb2NotAmp] <- "notAmpE"
pCondOnErbb2[i] <- coef(summary(lm(resp~explor+erbb2Amp)))[2,4]
}
else
{
pValsOut[i] <- NA
pValsLm[i] <- NA
tTestAmp[i] <- NA
tTestDir[i] <- NA
wilcoxP[i] <- NA
wilcoxDir[i] <- NA
pCondOnErbb2[i] <- NA
}
}
Assign names and calculate q values…
names(pValsOut) <- rownames(tumSampCnvMat)
names(pValsLm) <- rownames(tumSampCnvMat)
qTtest <- p.adjust(tTestAmp, method="BH")
names(tTestAmp) <- rownames(tumSampCnvMat)
names(tTestDir) <- rownames(tumSampCnvMat)
Create a table of some of the top results, we would expect ERBB2 to be near the top of this list.
resMat <- cbind(tTestAmp[order(tTestAmp)][1:40], tTestDir[order(tTestAmp)][1:40], qTtest[order(tTestAmp)][1:40], nAmp[order(tTestAmp)][1:40], nNotAmp[order(tTestAmp)][1:40])
colnames(resMat) <- c("PvalTests", "betaTtest", "qvalTtest", "numAmp", "numNotAmp")
print(resMat)
## PvalTests betaTtest qvalTtest numAmp numNotAmp
## PNMT 2.131717e-13 -0.1855012 2.264368e-10 112 949
## TCAP 2.131717e-13 -0.1855012 2.264368e-10 112 949
## PGAP3 2.447084e-13 -0.1849778 2.264368e-10 112 947
## MIEN1 5.188451e-13 -0.1806118 3.600785e-10 111 950
## MIR4728 6.966988e-13 -0.1785517 3.653561e-10 112 950
## GRB7 7.896745e-13 -0.1806983 3.653561e-10 108 948
## PPP1R1B 1.833179e-12 -0.1844752 7.269863e-10 106 951
## STARD3 2.450930e-12 -0.1800699 8.504727e-10 107 947
## NEUROD2 8.477655e-12 -0.1804107 2.614886e-09 102 958
## ERBB2 4.293287e-11 -0.1901659 1.191816e-08 86 945
## ZPBP2 2.184755e-10 -0.1748457 5.513526e-08 94 962
## GSDMB 8.083387e-10 -0.1703977 1.869957e-07 91 967
## ORMDL3 9.255421e-10 -0.1728747 1.976388e-07 90 968
## IKZF3 1.045248e-09 -0.1789772 2.072577e-07 82 943
## LRRC3C 3.556775e-09 -0.1709207 6.390570e-07 86 974
## GSDMA 3.683326e-09 -0.1735671 6.390570e-07 83 974
## PSMD3 1.487876e-08 -0.1759700 2.429615e-06 77 978
## CSF3 2.350264e-08 -0.1688332 3.624630e-06 79 981
## SNORD124 7.642309e-08 -0.1656246 1.116582e-05 77 985
## CDK12 4.849501e-07 -0.1425511 6.731107e-05 76 955
## THRA 5.117412e-07 -0.1708247 6.764731e-05 64 986
## MED24 1.353596e-06 -0.1693292 1.707992e-04 62 972
## NR1D1 2.687171e-06 -0.1639679 3.243298e-04 65 993
## NXPH3 6.640119e-06 -0.1872953 7.680405e-04 44 1014
## MED1 8.178420e-06 -0.1286938 9.081317e-04 72 974
## RAPGEFL1 9.872701e-06 -0.1566107 1.034166e-03 54 1000
## FLJ40194 1.032013e-05 -0.1730648 1.034166e-03 48 1013
## NGFR 1.043107e-05 -0.1870641 1.034166e-03 43 1012
## PHOSPHO1 1.447507e-05 -0.1735327 1.350182e-03 47 1013
## EPN3 1.459130e-05 -0.1799692 1.350182e-03 42 1018
## CASC3 1.582769e-05 -0.1511061 1.417344e-03 57 999
## ABI3 1.897444e-05 -0.1746711 1.605102e-03 46 1011
## MSL1 1.908082e-05 -0.1443380 1.605102e-03 60 997
## GNGT2 2.024867e-05 -0.1739508 1.653245e-03 46 1015
## WIPF2 2.672387e-05 -0.1525884 2.119585e-03 42 1008
## HELZ 2.797258e-05 -0.1688949 2.156997e-03 34 1009
## SLC35B1 3.018034e-05 -0.1787134 2.259502e-03 41 1017
## ACSF2 3.114199e-05 -0.1777976 2.259502e-03 44 1010
## PHB 3.174373e-05 -0.1653610 2.259502e-03 47 1007
## ANKRD40 3.271864e-05 -0.1654006 2.270674e-03 43 1015
genes <- rownames(resMat)
Show the top results if we use a Wilconxon Rank sum test instead: They are more or less the same.
names(wilcoxDir) <- rownames(tumSampCnvMat)
names(wilcoxP) <- rownames(tumSampCnvMat)
print(cbind(wilcoxP[order(wilcoxP)][1:20], wilcoxDir[order(wilcoxP)][1:20]))
## [,1] [,2]
## PNMT 2.806035e-13 -0.1689048
## TCAP 2.806035e-13 -0.1689048
## PGAP3 3.518291e-13 -0.1685115
## MIEN1 8.970819e-13 -0.1668479
## GRB7 1.260173e-12 -0.1696236
## MIR4728 1.484594e-12 -0.1642169
## PPP1R1B 1.764195e-12 -0.1685115
## STARD3 3.783053e-12 -0.1616436
## NEUROD2 7.384699e-12 -0.1633319
## ERBB2 2.054086e-11 -0.1748619
## ZPBP2 1.798013e-10 -0.1631121
## IKZF3 6.236899e-10 -0.1638376
## GSDMB 8.240603e-10 -0.1504689
## ORMDL3 8.741725e-10 -0.1562167
## GSDMA 2.641932e-09 -0.1503128
## LRRC3C 3.164589e-09 -0.1485649
## PSMD3 7.730517e-09 -0.1498768
## CSF3 1.614647e-08 -0.1598568
## SNORD124 6.287083e-08 -0.1485854
## THRA 6.140157e-07 -0.1608690
We have also created a set of results when we condition on ERBB2. When we do this, we identify the secondary drug target EGFR. We also identify a number of ABC transporters, which are known to be involved in multidrug resistance….
names(pCondOnErbb2) <- rownames(tumSampCnvMat)
print(sort(pCondOnErbb2)[1:20])
## ERBB2 EGFR-AS1 ABCA9 ABCA6 RAD52
## 7.580633e-12 3.231632e-04 5.328181e-04 5.341892e-04 9.134247e-04
## ABCA8 PRB3 PRB1 PRB2 POP4
## 1.446654e-03 1.554556e-03 1.557034e-03 1.557034e-03 2.287474e-03
## RP11-434C1.1 LOC284395 ARNTL2 C12orf71 MED21
## 2.918788e-03 3.482868e-03 3.894514e-03 3.929694e-03 3.939667e-03
## MIR4524A MIR4524B ABCA10 MYO18A ETNK1
## 4.024321e-03 4.024321e-03 4.047479e-03 4.060033e-03 4.222564e-03
Create a scatter plot of the predicted sensitivity Vs ERBB2 amplification.
svg(paste(theRootDir, "figures/erbb2VsLapatinib.svg", sep=""), width=4, height=4)
plot(tumSampCnvMat["ERBB2", relevantPatients], brca_preds_with_cnvs, pch=20, col="#00000044", xlab="CNV", ylab="Predicted Lapatinib IC50")
dev.off()
## png
## 2
Create a plot showing that predicted lapatinib sensitivity increases with increasing copy number of ERBB2
splitOnCnvNum <- tumSampCnvMat["ERBB2", relevantPatients]
splitOnCnvNum[splitOnCnvNum < 1] <- 0
splitOnCnvNum[splitOnCnvNum > 1 & splitOnCnvNum < 2] <- 1
splitOnCnvNum[splitOnCnvNum > 2 & splitOnCnvNum < 3] <- 2
splitOnCnvNum[splitOnCnvNum > 3] <- 3
a <- split(brca_preds_with_cnvs, splitOnCnvNum)
names(a) <- c("<1", "1-2", "2-3", ">3")
svg(paste(theRootDir, "figures/erbb2VsLapatinib_bplot.svg", sep=""), width=3, height=4)
boxplot(a, col=c("#eff3ff", "#bdd7e7", "#6baed6", "#2171b5"), ylab="Predicted Lapatinib Sensitivity", xlab="Normalized Copy Number", pch=20, cex.axis=.75, outcol="#00000033")
dev.off()
## png
## 2
Plot samples that are HER2 amplified or not amplifed against PC1 and PC2. of the corresponding gene expression matrix. This is a supplementary figure.
brca01aExpr <- tenRuvNewStandardApproach[, intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aExpr_patNames <- sapply(strsplit(colnames(brca01aExpr), ".", fixed=T), function(a)a[3])
colnames(brca01aExpr) <- brca01aExpr_patNames
brca_expr_with_cnvs <- brca01aExpr[, relevantPatients]
pcOut <- prcomp(t(brca_expr_with_cnvs))$x
thePcs <- pcOut$x
## Error in pcOut$x: $ operator is invalid for atomic vectors
hasErbb2Amp <- as.numeric(tumSampCnvMat["ERBB2", relevantPatients] > 1)
Print the P-values for the association of ERBB2 amplification and of the PCs of the gene expression matrix.
for(i in 1:10)
{
print(wilcox.test(thePcs[,i][which(hasErbb2Amp == 1)], thePcs[,i][which(hasErbb2Amp == 0)])$p.value)
}
## Error in wilcox.test(thePcs[, i][which(hasErbb2Amp == 1)], thePcs[, i][which(hasErbb2Amp == : object 'thePcs' not found
Make a plot for the strongest associated PCs, that is PCs 4 and 5.
thecols <- hasErbb2Amp
thecols[thecols == 1] <- "#377eb899"
thecols[thecols == 0] <- "#4daf4a99"
svg(paste(theRootDir, "figures/erbb2AgainstExpressionPcs.svg", sep=""), width=3, height=4)
plot(thePcs[,4], thePcs[,5], col=thecols, pch=1, xlab="PC4", ylab="PC5", cex.axis=.7, las=1)
## Error in plot(thePcs[, 4], thePcs[, 5], col = thecols, pch = 1, xlab = "PC4", : object 'thePcs' not found
legend("bottomleft", inset=.05, title="HER2 Status", c("Amplified","Not Amplified"), fill=c("#377eb899", "#4daf4a99"), cex=0.5)
## Error in strwidth(legend, units = "user", cex = cex, font = text.font): plot.new has not been called yet
dev.off()
## png
## 2
Plot the effect size in the region around the ERBB2 locus: Using this approach we can identify ERBB2 as the causitive gene in this locus.
# First load the GRanges object that was used in the creation of the CNV -> gene mapping, i.e. TxDb.Hsapiens.UCSC.hg19.knownGene
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
library(GenomicFeatures)
geneRanges <- genes(txdb)
library(org.Hs.eg.db)
e2s = toTable(org.Hs.egSYMBOL)
syms <- e2s[, "symbol"]
names(syms) <- e2s[, "gene_id"]
mcols(geneRanges)$gene_sym <- syms[as.character(mcols(geneRanges)$gene_id)]
save(geneRanges, file=paste(theRootDir, "dataIn/geneRangesHg19.RData", sep=""))
theDf <- as.data.frame(geneRanges)
theDf_filt <- theDf[!is.na(theDf$gene_sym), ] # remove the rows with nas for gene symbols
rownames(theDf_filt) <- theDf_filt$gene_sym
resLocs_ <- theDf_filt[genes, c("gene_sym", "seqnames", "start", "end")]
resLocs_2 <- resLocs_[abs(resLocs_$start) < 4.5e7, ] # we want to restrict to the window around erbb2
save(resLocs_2, file=paste(theRootDir, "dataOut/resLocs_2.RData", sep="")) # I will also make this same plot in the GDSC cell line dataset...
startVec <- abs(resLocs_2$start)
endVec <- abs(resLocs_2$end)
midVec <- ((startVec + endVec) / 2)
textVec <- names(resMat[resLocs_2$gene_sym, 2])
yVec <- resMat[resLocs_2$gene_sym, 2]
pVec <- resMat[resLocs_2$gene_sym, 1]
segmentsMat <- cbind(startVec, yVec, endVec, yVec)
# Create the plot that has line segments for the length of the genes
svg(paste(theRootDir, "figures/erbb2LocSegs.svg", sep=""), width=5, height=4)
plot(abs(resLocs_2$start)/1000000, resMat[resLocs_2$gene_sym, 2], xlab="Chromosome 17 Location (megabases)", ylab="Effect Size", las=1, pch=20, col="#ffffff", cex.axis=.7)
segments(startVec/1000000, yVec, endVec/1000000, yVec)
segments(startVec/1000000, yVec+0.0005, startVec/1000000, yVec-0.0005)
segments(endVec/1000000, yVec+0.0005, endVec/1000000, yVec-0.0005)
text(midVec/1000000, yVec+0.0013, textVec, cex=.3)
# lines(xLine/1000000, yLine)
dev.off()
## png
## 2
# Create a similar plot that just has a point at the midpoint of the gene location, this is probably a lot better.
svg(paste(theRootDir, "figures/erbb2LocPoints.svg", sep=""), width=5, height=4)
plot(midVec/1000000, yVec, xlab="Chromosome 17 Location (megabases)", ylab="Effect Size", las=1, pch=20, col="#00000099", cex.axis=.7)
text(midVec/1000000, yVec+0.0013, textVec, cex=.3)
dev.off()
## png
## 2
# Overlay p-values on the plot above with GGplot2....
library(ggplot2)
dat <- data.frame(x=midVec/1000000, y=yVec, Drug=textVec, pVal=-log10(pVec))
svg(paste(theRootDir, "figures/Erbb2Loc_points.svg", sep=""), width=6, height=4)
ggplot(data=dat, aes(x=x, y=y)) + theme_bw() + geom_point(aes(fill=pVal), size=I(3), pch=21) + geom_text(aes(label=Drug), vjust=-.5, hjust=-.24, size=2.5, angle=15) + ylab("Lapatinib Effect Size for HER2+ vs HER2-") + xlab("Chromosome 17 Location (megabases)") + scale_color_continuous(low="steelblue4",high="tomato2", name="-Log10 P-value") + theme(legend.position=c(.9,.2))
dev.off()
## png
## 2
# Different version of this plot (with no borders on the points, possibly harder to see some points)
library(ggplot2)
dat <- data.frame(x=midVec/1000000, y=yVec, Drug=textVec, pVal=-log10(pVec))
svg(paste(theRootDir, "figures/Erbb2Loc_points.svg", sep=""), width=7, height=5)
ggplot(data=dat, aes(x=x, y=y)) + theme_bw() + geom_point(aes(color=pVal), size=I(3)) + geom_text(aes(label=Drug), vjust=-.5, hjust=-.24, size=2.5, angle=15) + ylab("Lapatinib Effect Size for HER2+ vs HER2-") + xlab("Chromosome 17 Location (megabases)") + scale_color_continuous(low="steelblue4",high="tomato2", name="-Log10 P-value") + theme(legend.position=c(.9,.2))
dev.off()
## png
## 2
Apply predictions across all CNVs and drugs for breast cancer samples. Can we identify any novel associations!?
possibleDrugs <- c("A.443654", "A.770041", "ABT.263", "ABT.888", "AG.014699", "AICAR", "AKT.inhibitor.VIII", "AMG.706", "AP.24534", "AS601245", "ATRA", "AUY922", "Axitinib", "AZ628", "AZD.0530", "AZD.2281", "AZD6244", "AZD6482", "AZD7762", "AZD8055", "BAY.61.3606", "Bexarotene", "BI.2536", "BIBW2992", "Bicalutamide", "BI.D1870", "BIRB.0796", "Bleomycin", "BMS.509744", "BMS.536924", "BMS.708163", "BMS.754807", "Bortezomib", "Bosutinib", "Bryostatin.1", "BX.795", "Camptothecin", "CCT007093", "CCT018159", "CEP.701", "CGP.082996", "CGP.60474", "CHIR.99021", "CI.1040", "Cisplatin", "CMK", "Cyclopamine", "Cytarabine", "Dasatinib", "DMOG", "Docetaxel", "Doxorubicin", "EHT.1864", "Elesclomol", "Embelin", "Epothilone.B", "Erlotinib", "Etoposide", "FH535", "FTI.277", "GDC.0449", "GDC0941", "Gefitinib", "Gemcitabine", "GNF.2", "GSK269962A", "GSK.650394", "GW.441756", "GW843682X", "Imatinib", "IPA.3", "JNJ.26854165", "JNK.9L", "JNK.Inhibitor.VIII", "JW.7.52.1", "KIN001.135", "KU.55933", "Lapatinib", "Lenalidomide", "LFM.A13", "Metformin", "Methotrexate", "MG.132", "Midostaurin", "Mitomycin.C", "MK.2206", "MS.275", "Nilotinib", "NSC.87877", "NU.7441", "Nutlin.3a", "NVP.BEZ235", "NVP.TAE684", "Obatoclax.Mesylate", "OSI.906", "PAC.1", "Paclitaxel", "Parthenolide", "Pazopanib", "PD.0325901", "PD.0332991", "PD.173074", "PF.02341066", "PF.4708671", "PF.562271", "PHA.665752", "PLX4720", "Pyrimethamine", "QS11", "Rapamycin", "RDEA119", "RO.3306", "Roscovitine", "Salubrinal", "SB.216763", "SB590885", "Shikonin", "SL.0101.1", "Sorafenib", "S.Trityl.L.cysteine", "Sunitinib", "Temsirolimus", "Thapsigargin", "Tipifarnib", "TW.37", "Vinblastine", "Vinorelbine", "Vorinostat", "VX.680", "VX.702", "WH.4.023", "WO2009093972", "WZ.1.84", "X17.AAG", "X681640", "XMD8.85", "Z.LLNle.CHO", "ZM.447439")
theGeneNames <- rownames(theCnvQuantVecList_mat)
doPredict <- function(k, theDrugPredictions)
{
predOnAll_residualized <- theDrugPredictions[possibleDrugs[k], ]
# get the cnv data for breast cancer....
tumSampCnvMat <- theCnvQuantVecList_mat[, tumorSamps]
oLapSamps <- names(predOnAll_residualized)[names(predOnAll_residualized) %in% colnames(tumSampCnvMat)]
# Match participants in both datasets.
patPred <- sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[3])
pat01A <- which(sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[4]) == "01A")
patCnv <- sapply(strsplit(colnames(tumSampCnvMat), "-"), function(a)a[3])
colnames(tumSampCnvMat) <- patCnv
relevantPatients <- patCnv[patCnv %in% intersect(patPred[pat01A], patPred[cancerTypesVec == "BRCA"])] # 01A breast cancer patients
brca01aPreds <- predOnAll_residualized[intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aPreds_patNames <- sapply(strsplit(names(brca01aPreds), "-"), function(a)a[3])
names(brca01aPreds) <- brca01aPreds_patNames
brca_preds_with_cnvs <- brca01aPreds[relevantPatients]
tTestAmp <- numeric()
tTestDir <- numeric()
wilcoxP <- numeric()
wilcoxDir <- numeric()
numAmpVec <- numeric()
numNotAmpVec <- numeric()
for(i in 1:nrow(tumSampCnvMat))
{
amp <- which(tumSampCnvMat[i, relevantPatients] > 1)
notAmp <- which(tumSampCnvMat[i, relevantPatients] < 1)
if(length(amp) > 5)
{
tTestAmp[i] <- t.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value # p-value for t-test between cnved and not cnved
tTestDir[i] <- (mean(brca_preds_with_cnvs[relevantPatients][amp]) - mean(brca_preds_with_cnvs[relevantPatients][notAmp]))
wilcoxP[i] <- wilcox.test(brca_preds_with_cnvs[relevantPatients][amp], brca_preds_with_cnvs[relevantPatients][notAmp])$p.value
wilcoxDir[i] <- (median(brca_preds_with_cnvs[relevantPatients][amp]) - median(brca_preds_with_cnvs[relevantPatients][notAmp]))
numAmpVec[i] <- length(amp)
numNotAmpVec[i] <- length(notAmp)
}
}
outMat <- cbind(tTestAmp, tTestDir, wilcoxP, wilcoxDir, numAmpVec, numNotAmpVec)
rownames(outMat) <- theGeneNames[1:23046]
colnames(outMat) <- c("tTestAmp", "tTestDir", "wilcoxP", "wilcoxDir", "numAmpVec", "numNotAmpVec")
print(k)
return(outMat)
}
library("parallel")
allBrcaCnaAssocs <- mclapply(1:138, doPredict, allDrugPredictions_mat, mc.cores=12)
names(allBrcaCnaAssocs) <- possibleDrugs
Find the best assocation for each drug, (these are wilcoxon rank sum test p-values……)
minGenes <- list()
minGenesDiff <- list()
ampNumList <- list()
notAmpNumList <- list()
for(i in 1:length(possibleDrugs))
{
minGenes[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,3][1]
minGenesDiff[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,4][1]
ampNumList[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,5][1]
notAmpNumList[[i]] <- allBrcaCnaAssocs[[possibleDrugs[i]]][order(allBrcaCnaAssocs[[possibleDrugs[i]]][,3]), ][,6][1]
}
names(minGenes) <- possibleDrugs
names(minGenesDiff) <- possibleDrugs
names(ampNumList) <- possibleDrugs
names(notAmpNumList) <- possibleDrugs
Print the associations in a positive direction (i.e. amplificaiton predictive of resistance)
print(sort(unlist(minGenes[minGenesDiff > 0])))
## Obatoclax.Mesylate.PGAP3 Vinorelbine.LOC728024
## 3.038605e-29 4.436399e-20
## CEP.701.MIEN1 Tipifarnib.LOC728024
## 5.952652e-20 3.867692e-18
## Epothilone.B.MIR4728 KU.55933.PGAP3
## 6.240719e-17 3.116550e-16
## PLX4720.ORMDL3 Z.LLNle.CHO.ERLIN2
## 4.334573e-16 4.465949e-16
## Gemcitabine.LOC728024 PF.562271.LOC728024
## 6.613063e-16 1.111935e-15
## BAY.61.3606.CCND1 Shikonin.PROSC
## 1.801834e-15 2.183088e-15
## Cyclopamine.PROSC Cytarabine.MIEN1
## 2.882756e-15 4.433241e-15
## AZ628.MIR4728 MG.132.OXR1
## 1.706294e-14 1.722290e-14
## X681640.MIEN1 BI.D1870.MIR4737
## 3.340065e-14 3.501819e-14
## Embelin.LOC728024 GSK.650394.SUMO1P1
## 5.361023e-14 6.668509e-14
## AS601245.STARD3 JNK.9L.ERLIN2
## 3.352375e-13 6.997429e-13
## Pyrimethamine.PNMT AZD7762.MIEN1
## 9.330410e-13 1.230506e-12
## AUY922.GPR124 Etoposide.ERLIN2
## 1.321776e-12 1.353113e-12
## Doxorubicin.ERLIN2 Bortezomib.POU5F1B
## 1.577490e-12 1.838159e-12
## Bexarotene.ZFPM2 NU.7441.LOC653653
## 4.827938e-12 4.931655e-12
## CI.1040.IKZF3 Roscovitine.GDPD4
## 1.780560e-11 2.606216e-11
## NSC.87877.PROSC RDEA119.TRPS1
## 3.137257e-11 3.514339e-11
## Mitomycin.C.PCK1 RO.3306.PNMT
## 6.527495e-11 6.975004e-11
## CHIR.99021.CDH17 GDC.0449.MIEN1
## 9.433678e-11 1.070058e-10
## BX.795.APPBP2 AG.014699.STARD3
## 1.602976e-10 1.761085e-10
## AICAR.PGAP3 Vinblastine.MCM4
## 2.037792e-10 2.522408e-10
## SB.216763.FAM91A1 Bosutinib.ERBB2
## 2.544441e-10 2.576029e-10
## AP.24534.STARD3 Thapsigargin.SUMO1P1
## 3.658541e-10 4.626154e-10
## AKT.inhibitor.VIII.LOC728024 QS11.ZNF217
## 7.789714e-10 8.292796e-10
## AZD.2281.SHANK2-AS1 TW.37.MIEN1
## 1.212760e-09 1.488899e-09
## CCT018159.ZNF703 Docetaxel.PGAP3
## 1.639920e-09 1.905954e-09
## Bleomycin.PCK1 ZM.447439.MYEOV
## 2.105569e-09 4.293971e-09
## NVP.BEZ235.MIR4737 PD.0332991.MIEN1
## 4.974007e-09 5.078702e-09
## AZD6244.GSDMB Cisplatin.EIF4EBP1
## 7.129934e-09 7.923850e-09
## Axitinib.MIEN1 Methotrexate.ERBB2
## 1.151465e-08 1.376423e-08
## GNF.2.C17orf64 Gefitinib.ORMDL3
## 5.313810e-08 5.666531e-08
## Midostaurin.MIEN1 XMD8.85.ADRB3
## 5.872991e-08 6.299181e-08
## Parthenolide.KAT6A CGP.60474.EIF4EBP1
## 6.310236e-08 6.744795e-08
## Camptothecin.NTSR1 BMS.754807.ZNF217
## 2.860432e-07 3.818678e-07
## DMOG.FGF4 IPA.3.ERLIN2
## 3.896414e-07 4.407799e-07
## SB590885.ERBB2 Erlotinib.MIEN1
## 7.220367e-07 7.846876e-07
## BMS.536924.GDPD4 PAC.1.ERLIN2
## 1.566439e-06 1.800310e-06
## FTI.277.TRPS1 WO2009093972.LOC100129361
## 1.937772e-06 2.343748e-06
## FH535.LOC728024 Bryostatin.1.TRPS1
## 4.996826e-06 6.630162e-06
## Paclitaxel.EIF4EBP1 PD.0325901.WDYHV1
## 7.564669e-06 1.334315e-05
## Rapamycin.ERP27 BMS.509744.CLNS1A
## 1.849792e-05 2.149158e-05
## X17.AAG.TAS2R50 Sunitinib.GDPD4
## 3.645146e-05 4.735013e-05
## LFM.A13.ACER3 Metformin.SPAG4
## 6.134401e-05 1.017338e-04
## Dasatinib.APPBP2 WH.4.023.C1orf111
## 1.887158e-04 1.904943e-04
## Pazopanib.ZNF420 ABT.888.KDM2A
## 3.985901e-04 4.268825e-04
## GW843682X.PROSC GSK269962A.MIR101-2
## 5.257780e-04 5.446034e-04
## PF.02341066.ETV6 BI.2536.WHSC1L1
## 6.313841e-04 7.406662e-04
## NVP.TAE684.PRR4
## 1.760088e-03
Print the associations in a negative direction, i.e. amplification predictive of drug sensitivity.
print(sort(unlist(minGenes[minGenesDiff < 0])))
## CGP.082996.CSMD3 AMG.706.PROSC
## 1.612064e-17 3.157053e-17
## GW.441756.ORAOV1 EHT.1864.MIR378D2
## 1.619676e-15 2.167591e-15
## MK.2206.PGAP3 A.443654.STARD3
## 3.191264e-15 4.365148e-15
## BIRB.0796.SLC26A7 PD.173074.EBAG9
## 1.677502e-14 2.091675e-14
## WZ.1.84.PNMT PF.4708671.SLC26A7
## 5.523420e-14 1.424001e-13
## Lapatinib.PNMT AZD6482.PGAP3
## 2.806035e-13 5.947210e-13
## JNK.Inhibitor.VIII.RAD54B Elesclomol.STK3
## 9.880550e-12 1.884021e-11
## GDC0941.STK3 MS.275.SAMD12
## 5.731972e-11 6.481744e-11
## Nutlin.3a.MIR21 Salubrinal.MIR661
## 1.070968e-10 2.353915e-10
## Sorafenib.RNFT1 CCT007093.CHAD
## 5.235713e-10 3.708401e-09
## BMS.708163.LOC728024 VX.702.PPM1D
## 4.138621e-09 6.243912e-09
## CMK.CSMD3 JW.7.52.1.STARD3
## 6.498673e-09 1.250637e-08
## BIBW2992.ACER3 SL.0101.1.GRHL2
## 1.516977e-08 4.910921e-07
## AZD8055.STK3 A.770041.PPP1R1B
## 5.326922e-07 6.314740e-07
## AZD.0530.SEZ6 S.Trityl.L.cysteine.FBXO32
## 8.060248e-07 8.116948e-07
## Lenalidomide.ZFAT-AS1 Temsirolimus.MIR378D2
## 9.756524e-07 2.748206e-06
## OSI.906.RP11-434C1.1 ABT.263.MIR3662
## 3.512938e-06 5.466745e-06
## Bicalutamide.ACSF2 Nilotinib.RDH10
## 1.338896e-05 1.617286e-05
## Vorinostat.LOC100129361 VX.680.LOC100129361
## 2.346588e-05 2.357488e-05
## ATRA.FLJ46284 Imatinib.ZNF274
## 2.738357e-05 4.973905e-05
## KIN001.135.NEUROD2 JNJ.26854165.STK3
## 2.147513e-04 5.170602e-04
## PHA.665752.PTK7
## 6.168052e-04
Make a table of the top association for every drug, with drug, gene, chromsome, chromosome location, P-value, effect size, number of amplified samples, number of not amplified samples.
genes <- sapply(minGenes, names)
library(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
## [1] "ENTREZID" "PFAM" "IPI" "PROSITE"
## [5] "ACCNUM" "ALIAS" "CHR" "CHRLOC"
## [9] "CHRLOCEND" "ENZYME" "MAP" "PATH"
## [13] "PMID" "REFSEQ" "SYMBOL" "UNIGENE"
## [17] "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS" "GENENAME"
## [21] "UNIPROT" "GO" "EVIDENCE" "ONTOLOGY"
## [25] "GOALL" "EVIDENCEALL" "ONTOLOGYALL" "OMIM"
## [29] "UCSCKG"
columns(org.Hs.eg.db)
## [1] "ENTREZID" "PFAM" "IPI" "PROSITE"
## [5] "ACCNUM" "ALIAS" "CHR" "CHRLOC"
## [9] "CHRLOCEND" "ENZYME" "MAP" "PATH"
## [13] "PMID" "REFSEQ" "SYMBOL" "UNIGENE"
## [17] "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS" "GENENAME"
## [21] "UNIPROT" "GO" "EVIDENCE" "ONTOLOGY"
## [25] "GOALL" "EVIDENCEALL" "ONTOLOGYALL" "OMIM"
## [29] "UCSCKG"
resLocs <- select(org.Hs.eg.db, keys= genes, columns = c("SYMBOL","CHR","CHRLOC","CHRLOCEND"), keytype = "SYMBOL")
resNodups <- resLocs[!duplicated(resLocs[, "SYMBOL"]),]
rownames(resNodups) <- resNodups[,1]
chr <- resNodups[genes, 2]
chrLoc <- resNodups[genes, 3]
df <- data.frame(drug=names(minGenes), genes=genes, chr=chr, chrLoc=chrLoc, pVals=unlist(minGenes), beta=unlist(minGenesDiff), numAmp=unlist(ampNumList), numNotAmp=unlist(notAmpNumList))
dfOrd <- df[order(df[,"pVals"]), ]
write.table(dfOrd, file=paste(theRootDir, "tables/top_cnv_drug_assocs_brca.txt", sep=""), row.names=F)
## Error in file(file, ifelse(append, "a", "w")): cannot open the connection
Make the plot of Effect-size / P-values for Vinorelbine in the ERLIN2 locus.
topAssocsVbine <- allBrcaCnaAssocs[["Vinorelbine"]][order(allBrcaCnaAssocs[["Vinorelbine"]][,3]), ][1:40,]
resLocs_vBine <- theDf_filt[rownames(topAssocsVbine), c("gene_sym", "seqnames", "start", "end")]
topLocusGenes_vBine <- na.omit(rownames(resLocs_vBine)[resLocs_vBine[, "seqnames"] == "chr8"]) # select out the genes in the region of LOC728024 on chromosome 8, i.e. the region of the strongest association...
resLocs_vBine_2 <- resLocs_vBine[topLocusGenes_vBine, ]
save(resLocs_vBine_2, file=paste(theRootDir, "dataOut/resLocs_vBine_2.RData", sep=""))
Overlay p-values on the plot above with GGplot2….
midVec <- ((resLocs_vBine[topLocusGenes_vBine,]$end + resLocs_vBine[topLocusGenes_vBine,]$start) / 2)
yVec <- topAssocsVbine[topLocusGenes_vBine, 2]
textVec <- topLocusGenes_vBine
pVec <- topAssocsVbine[topLocusGenes_vBine, 1]
dat <- data.frame(x=midVec/1000000, y=yVec, Drug=textVec, pVal=-log10(pVec))
svg(paste(theRootDir, "figures/Erlin2Loc_points.svg", sep=""), width=7, height=5)
ggplot(data=dat, aes(x=x, y=y)) + theme_bw() + geom_point(aes(color=pVal), size=I(3)) + geom_text(aes(label=Drug), vjust=-.5, hjust=-.24, size=2.5, angle=15) + ylab("Vinorelbine Effect Size for ERLIN2 Amplified vs Not Amplified") + xlab("Chromosome 8 Location (megabases)") + scale_color_continuous(low="steelblue4",high="tomato2", name="-Log10 P-value") + theme(legend.position=c(.9,.8))
dev.off()
## png
## 2
Create a scatter plot of the predicted vinorelbine sensitivity Vs ERLIN2 amplification.
predOnAll_residualized <- allDrugPredictions_mat["Vinorelbine", ]
tumSampCnvMat <- theCnvQuantVecList_mat[, tumorSamps]
oLapSamps <- names(predOnAll_residualized)[names(predOnAll_residualized) %in% colnames(tumSampCnvMat)]
patPred <- sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[3])
pat01A <- which(sapply(strsplit(names(predOnAll_residualized), "-"), function(a)a[4]) == "01A")
patCnv <- sapply(strsplit(colnames(tumSampCnvMat), "-"), function(a)a[3])
colnames(tumSampCnvMat) <- patCnv
relevantPatients <- patCnv[patCnv %in% intersect(patPred[pat01A], patPred[cancerTypesVec == "BRCA"])] # 01A breast cancer patients
brca01aPreds <- predOnAll_residualized[intersect(pat01A, which(cancerTypesVec == "BRCA"))]
brca01aPreds_patNames <- sapply(strsplit(names(brca01aPreds), "-"), function(a)a[3])
names(brca01aPreds) <- brca01aPreds_patNames
brca_vbine_preds_with_cnvs <- brca01aPreds[relevantPatients]
svg(paste(theRootDir, "figures/erlin2vsVinorelbine.svg", sep=""), width=4, height=4)
plot(tumSampCnvMat["ERLIN2", relevantPatients], brca_vbine_preds_with_cnvs, pch=20, col="#00000044", xlab="ERLIN2 CNV", ylab="Predicted Vinorelbine IC50")
dev.off()
## png
## 2
Create a boxplot stratifying by CNV for vinorelbine and ERLIN2
splitOnCnvNum <- tumSampCnvMat["ERLIN2", relevantPatients]
splitOnCnvNum[splitOnCnvNum < 1] <- 0
splitOnCnvNum[splitOnCnvNum > 1 & splitOnCnvNum < 2] <- 1
splitOnCnvNum[splitOnCnvNum > 2 & splitOnCnvNum < 3] <- 2
splitOnCnvNum[splitOnCnvNum > 3] <- 3
a <- split(brca_vbine_preds_with_cnvs, splitOnCnvNum)
names(a) <- c("<1", "1-2", "2-3", ">3")
svg(paste(theRootDir, "figures/erlin2VsVinorelbine_bplot.svg", sep=""), width=3, height=4)
boxplot(a, col=c("#eff3ff", "#bdd7e7", "#6baed6", "#2171b5"), ylab="Predicted Vinorelbine Sensitivity", xlab="Normalized Copy Number", pch=20, cex.axis=.75, outcol="#00000033")
dev.off()
## png
## 2
load(file=paste(theRootDir, "dataOut/brcaDrugPredsAll.RData", sep="")) # allSizesOut contains the breast cancer specific predictions (correlate with cancer type )
names(allSizesOut) <- possibleDrugs
brcaDrugMat <- do.call(rbind, allSizesOut)
colnames(brcaDrugMat) <- gsub(".", "-", colnames(brcaDrugMat), fixed=T)
allBrcaCnaAssocs_onlyBrcaPreds <- mclapply(1:138, doPredict, brcaDrugMat, mc.cores=12)
names(allBrcaCnaAssocs_onlyBrcaPreds) <- possibleDrugs
Find the best assocation for each drug, (these are wilcoxon rank sum test p-values……)
minGenes <- list()
minGenesDiff <- list()
ampNumList <- list()
notAmpNumList <- list()
for(i in 1:length(possibleDrugs))
{
minGenes[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,3][1]
minGenesDiff[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,4][1]
ampNumList[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,5][1]
notAmpNumList[[i]] <- allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][order(allBrcaCnaAssocs_onlyBrcaPreds[[possibleDrugs[i]]][,3]), ][,6][1]
}
names(minGenes) <- possibleDrugs
names(minGenesDiff) <- possibleDrugs
names(ampNumList) <- possibleDrugs
names(notAmpNumList) <- possibleDrugs
Print the associations in a positive direction (i.e. amplificaiton predictive of resistance)
print(sort(unlist(minGenes[minGenesDiff > 0])))
## Roscovitine.MIR4728 Bexarotene.GRHL2 PF.562271.LOC728024
## 7.580255e-12 3.219609e-10 3.708790e-10
## LFM.A13.ERLIN2 Tipifarnib.PPP1R1B AZ628.NGFR
## 6.731744e-10 8.475452e-10 2.737039e-09
## AP.24534.PGAP3 Salubrinal.CDK12 GDC.0449.MIR4728
## 9.659584e-09 1.448147e-08 1.464785e-08
## AS601245.PPP1R1B AKT.inhibitor.VIII.PROSC CGP.60474.C8orf87
## 1.921149e-08 3.070745e-08 3.367411e-08
## BMS.754807.ZMAT4 Vinblastine.MIR4661 Bortezomib.POU5F1B
## 4.148035e-08 5.124794e-08 5.153286e-08
## Docetaxel.LINC00534 Cyclopamine.ANO1 ZM.447439.MRGPRF
## 5.685914e-08 6.783259e-08 7.533856e-08
## AZD6244.MED1 PLX4720.SNORD124 Obatoclax.Mesylate.IKZF3
## 1.047054e-07 1.579737e-07 2.731635e-07
## Cisplatin.RNF170 CEP.701.NR1D1 JNK.9L.LOC728024
## 2.832690e-07 3.064566e-07 3.441460e-07
## Bosutinib.ERBB2 ABT.888.STARD3 XMD8.85.GOLGA7
## 3.614828e-07 3.734773e-07 3.775054e-07
## Cytarabine.MIR486 DMOG.SMIM19 Erlotinib.ERLIN2
## 4.430264e-07 4.443065e-07 4.462916e-07
## BX.795.ANK1 NVP.TAE684.MIR486 Etoposide.FGF4
## 5.212073e-07 5.863721e-07 5.949510e-07
## AZD.2281.MIR4728 AZD6482.MIR4729 CI.1040.FGF3
## 6.323958e-07 7.075624e-07 1.003976e-06
## Bryostatin.1.ERLIN2 RDEA119.GRHL2 CCT018159.C8orf86
## 1.105213e-06 1.496259e-06 1.711455e-06
## Imatinib.MYC Gefitinib.GSDMB Rapamycin.MYC
## 1.858662e-06 1.914926e-06 2.128805e-06
## Dasatinib.MIR486 GSK269962A.ZFAT-AS1 SB.216763.LRP12
## 2.554019e-06 2.871777e-06 2.887660e-06
## CHIR.99021.C8orf46 Parthenolide.NKX6-3 AUY922.PROSC
## 2.946100e-06 3.200228e-06 3.486222e-06
## CCT007093.MYC KIN001.135.TRPS1 MG.132.ZFPM2
## 3.759236e-06 4.304591e-06 6.349284e-06
## AG.014699.GINS4 Z.LLNle.CHO.LOC100130964 Elesclomol.ERBB2
## 7.872371e-06 1.085015e-05 1.154350e-05
## A.770041.NKX6-3 Thapsigargin.NKX6-3 Gemcitabine.RNF170
## 1.363249e-05 1.472352e-05 1.531667e-05
## NSC.87877.MYEOV PD.0325901.PCGF2 NU.7441.ALDH3B2
## 2.076468e-05 2.124136e-05 2.619948e-05
## TW.37.ZFAT-AS1 NVP.BEZ235.SLC16A6 Sunitinib.NKX6-3
## 2.829778e-05 2.881948e-05 3.850261e-05
## Pazopanib.MED30 Doxorubicin.CA4 BMS.536924.SOX13
## 4.326583e-05 4.614227e-05 4.947626e-05
## Midostaurin.SLC16A6 PF.02341066.MIR486 Bicalutamide.MYB
## 5.194957e-05 6.363689e-05 6.790317e-05
## GDC0941.LOC440461 SB590885.GSDMB BMS.509744.ZMAT4
## 7.010826e-05 7.569821e-05 7.783580e-05
## WH.4.023.NKX6-3 Methotrexate.B4GALNT2 Camptothecin.UNC93B1
## 7.983603e-05 8.077737e-05 9.572956e-05
## Mitomycin.C.CTSZ AZD7762.ALDH3B2 VX.680.NKX6-3
## 9.952987e-05 1.035293e-04 1.086255e-04
## FTI.277.MCMDC2 BAY.61.3606.PPFIA1 GNF.2.LOC100130964
## 1.197050e-04 1.199934e-04 1.229951e-04
## Epothilone.B.PROSC OSI.906.ZMAT4 Nutlin.3a.MYC
## 1.311228e-04 1.630970e-04 1.632368e-04
## PHA.665752.SFRP1 BI.2536.ZMAT4 Paclitaxel.ERLIN2
## 1.696289e-04 1.736782e-04 1.756232e-04
## Pyrimethamine.PNMT Temsirolimus.ZNF420 X17.AAG.BYSL
## 1.801079e-04 2.172288e-04 2.237513e-04
## JW.7.52.1.SOX13 Embelin.BAALC Bleomycin.UNC13D
## 2.390153e-04 2.407707e-04 2.607394e-04
## FH535.P4HA3 AZD8055.ZNF420 X681640.UNC93B1
## 2.895547e-04 3.583911e-04 3.788457e-04
## PD.0332991.MED1 Shikonin.TRPS1 GSK.650394.ERBB2
## 3.922937e-04 4.389813e-04 4.934543e-04
## Vinorelbine.MIR298 WO2009093972.MYB GW843682X.STAR
## 4.952036e-04 7.714904e-04 8.381963e-04
## MK.2206.MIR9-1 QS11.C17orf82 JNJ.26854165.TACC1
## 9.393943e-04 1.049329e-03 1.060024e-03
## Nilotinib.MEF2D Metformin.MIR1204 Axitinib.ZFAT-AS1
## 1.150354e-03 3.508275e-03 3.920953e-03
## PAC.1.UNC13D
## 5.548849e-03
Print the associations in a negative direction, i.e. amplification predictive of drug sensitivity.
print(sort(unlist(minGenes[minGenesDiff < 0])))
## A.443654.NEUROD2 PF.4708671.LOC101241902
## 1.024578e-11 1.157782e-09
## Lapatinib.PGAP3 JNK.Inhibitor.VIII.ORAOV1
## 1.733334e-08 2.806220e-08
## GW.441756.ORAOV1 WZ.1.84.PGAP3
## 6.468165e-08 2.167807e-07
## AZD.0530.PGAP3 BIRB.0796.LOC100130964
## 2.323441e-06 3.381639e-06
## AMG.706.C8orf4 EHT.1864.ANK1
## 1.217321e-05 1.222739e-05
## BIBW2992.ALDH3B2 PD.173074.SBSPON
## 1.482095e-05 1.628961e-05
## BMS.708163.TRIQK ABT.263.ADRB3
## 1.705820e-05 2.564219e-05
## CMK.PVT1 Lenalidomide.RUNX1T1
## 5.549117e-05 6.770081e-05
## CGP.082996.IQGAP3 Sorafenib.LRP5
## 2.086722e-04 2.218286e-04
## MS.275.LINC00662 Vorinostat.TAOK1
## 2.632803e-04 2.810888e-04
## BI.D1870.URI1 KU.55933.MYC
## 3.885451e-04 4.817703e-04
## ATRA.MRGPRF RO.3306.CCNE1
## 5.257386e-04 5.318454e-04
## VX.702.PPM1D IPA.3.DCAF4L2
## 6.826387e-04 8.598105e-04
## SL.0101.1.MYC AICAR.NOS2
## 1.204003e-03 1.889377e-03
## S.Trityl.L.cysteine.LOC284395
## 2.334671e-03
# ' What do the correlations for these drug predictions actually look like across the dataset? They are quite highly correlated.
allDrugPredictions_mat_brca <- allDrugPredictions_mat[, colnames(brcaDrugMat)]
theCors <- numeric()
theCorPs <- numeric()
for(i in 1:nrow(allDrugPredictions_mat_brca))
{
theCors[i] <- cor(allDrugPredictions_mat_brca[i,], brcaDrugMat[i,], method="spearman")
theCorPs[i] <- cor.test(allDrugPredictions_mat_brca[i,], brcaDrugMat[i,], method="spearman")$p.value
}
names(theCors) <- rownames(allDrugPredictions_mat_brca)
names(theCorPs) <- rownames(allDrugPredictions_mat_brca)
The median correlations are high and highly significant….
print(median(theCors))
## [1] 0.6811614
print(median(theCorPs))
## [1] 0
Create a histogram of these correlations
svg(paste(theRootDir, "figures/allOrBcHist.svg", sep=""), width=4, height=4)
hist(theCors, col="black", xlab="Spearman Correlation", main="", las=1, cex.axis=.75, breaks=20)
dev.off()
## png
## 2
Session Info
print(sessionInfo())
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.3 LTS
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggplot2_2.2.1
## [2] org.Hs.eg.db_2.14.0
## [3] RSQLite_1.0.0
## [4] DBI_0.3.1
## [5] XVector_0.4.0
## [6] TxDb.Hsapiens.UCSC.hg19.knownGene_2.14.0
## [7] GenomicFeatures_1.16.3
## [8] AnnotationDbi_1.26.1
## [9] Biobase_2.24.0
## [10] GenomicRanges_1.16.4
## [11] GenomeInfoDb_1.0.2
## [12] IRanges_1.22.10
## [13] BiocGenerics_0.10.0
## [14] pRRophetic_0.5
## [15] knitr_1.12.3
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.9 lattice_0.20-33
## [3] corpcor_1.6.8 Rsamtools_1.16.1
## [5] Biostrings_2.32.1 assertthat_0.1
## [7] digest_0.6.8 foreach_1.4.3
## [9] ridge_2.1-3 plyr_1.8.4
## [11] BatchJobs_1.6 MatrixModels_0.4-1
## [13] stats4_3.2.2 evaluate_0.8.3
## [15] sva_3.10.0 zlibbioc_1.10.0
## [17] lazyeval_0.2.0 minqa_1.2.4
## [19] annotate_1.42.1 SparseM_1.7
## [21] car_2.1-0 nloptr_1.0.4
## [23] Matrix_1.2-2 checkmate_1.6.2
## [25] preprocessCore_1.26.1 labeling_0.3
## [27] splines_3.2.2 lme4_1.1-10
## [29] BiocParallel_0.6.1 stringr_1.0.0
## [31] munsell_0.4.3 RCurl_1.95-4.7
## [33] biomaRt_2.20.0 sendmailR_1.2-1
## [35] rtracklayer_1.24.2 base64enc_0.1-3
## [37] mgcv_1.8-7 BBmisc_1.9
## [39] nnet_7.3-11 fail_1.3
## [41] tibble_1.2-15 codetools_0.2-14
## [43] XML_3.98-1.3 GenomicAlignments_1.0.6
## [45] MASS_7.3-44 bitops_1.0-6
## [47] grid_3.2.2 nlme_3.1-122
## [49] xtable_1.7-4 gtable_0.2.0
## [51] magrittr_1.5 scales_0.4.1
## [53] stringi_1.0-1 genefilter_1.46.1
## [55] brew_1.0-6 iterators_1.0.8
## [57] tools_3.2.2 BSgenome_1.32.0
## [59] markdown_0.7.7 pbkrtest_0.4-2
## [61] survival_2.40-1 colorspace_1.2-6
## [63] quantreg_5.19