I now want to investigate associations between mutations and between drug sensitivty when I look across the entire TCGA datasets. I will summarize the mutations on a per-gene basis, i.e. I will call a gene “mutated” if there is any protein coding change anywhere in the gene. Load the somatic mutation data from TCGA, this data is from exome sequencing and was obtained from firebrowse.org. First I need to create a matrix of genes by samples and indicate whether samples do / don't have somatic mutations. I.e. summarize the mutation data by gene.
theRootDir <- "/mnt/data_scratch/finalData/"
dir.create(paste(theRootDir, "tables/", sep=""), showWarnings = FALSE)
dirList <- dir(paste(theRootDir, "dataIn/mutation_data/", sep=""))
subfolders <- dirList[-grep(".tar.gz", dirList)]
mutsListAll <- list() # a list of mutations occuring in each sample.
allFileNames <- character()
proteinChangingMutations <- c("Missense_Mutation", "Nonsense_Mutation", "Frame_Shift_Del", "In_Frame_Del", "Frame_Shift_Ins", "In_Frame_Ins", "Nonstop_Mutation", "De_novo_Start_OutOfFrame", "De_novo_Start_InFrame", "Missense", "Read-through", "Indel")
genesWithProteinChangeList <- list()
for(i in 1:length(subfolders))
{
subFolderFiles <- dir(paste(theRootDir, "dataIn/mutation_data/", subfolders[i], sep=""))[-1] # list the files (getting rid of manifest.txt)
tcgaIds <- sapply(subFolderFiles, function(item)return(strsplit(item, ".", fixed=T)[[1]][1]))
allFileNames <- c(allFileNames, subFolderFiles)
mutsListAll[[i]] <- list()
genesWithProteinChangeList[[i]] <- list()
# now for each file in this subfolder, pull out a list of genes with somatic mutations
for(j in 1:length(subFolderFiles))
{
mutfile <- read.delim(paste(theRootDir, "dataIn/mutation_data/", subfolders[i], "/", subFolderFiles[j], sep=""), as.is=T)
variantType <- mutfile[ ,"Variant_Classification"]
theGenes <- mutfile[ ,"Hugo_Symbol"]
names(variantType) <- theGenes
mutsListAll[[i]][[j]] <- variantType
genesWithProteinChangeList[[i]][[j]] <- unique(names(variantType[variantType %in% proteinChangingMutations]))
}
}
allMutatedGenes <- unique(names(unlist(mutsListAll)))
mutationTypes <- table(unlist(mutsListAll))
mutsListAll_unlist <- unlist(mutsListAll, recursive=F)
genesWithProteinChangeList_unlist <- unlist(genesWithProteinChangeList, recursive=F)
List the frequency of the different types of mutations catalogued in the files above.
print(sort(mutationTypes, decreasing=T))
##
## Missense_Mutation Silent Nonsense_Mutation
## 1094331 440407 82280
## Frame_Shift_Del RNA Splice_Site
## 51462 42086 34546
## Frame_Shift_Ins In_Frame_Del R
## 22463 10479 2958
## In_Frame_Ins Intron Nonstop_Mutation
## 2088 1817 1152
## Translation_Start_Site Splice_Site_SNP De_novo_Start_OutOfFrame
## 1118 407 324
## IGR 3'UTR 5'UTR
## 210 133 84
## De_novo_Start_InFrame Missense 5'Flank
## 44 44 36
## Splice_Site_Del Read-through Targeted_Region
## 33 20 16
## Indel Splice_Site_Ins
## 13 7
From mutsListAll we can then create a matrix indicating if the gene has a coding mutation.
tcgaIdsAll <- sapply(strsplit(allFileNames, ".", fixed=T), function(l)return(l[[1]]))
mutMat <- numeric((length(tcgaIdsAll)*length(allMutatedGenes)))
dim(mutMat) <- c(length(allMutatedGenes), length(tcgaIdsAll))
rownames(mutMat) <- allMutatedGenes
colnames(mutMat) <- tcgaIdsAll
print(mutMat[1:5, 1:5])
## TCGA-OR-A5J1-01 TCGA-OR-A5J2-01 TCGA-OR-A5J3-01 TCGA-OR-A5J4-01
## PTCHD2 0 0 0 0
## VPS13D 0 0 0 0
## PHC2 0 0 0 0
## LCE1B 0 0 0 0
## LCE1A 0 0 0 0
## TCGA-OR-A5J5-01
## PTCHD2 0
## VPS13D 0
## PHC2 0
## LCE1B 0
## LCE1A 0
mutMatTest <- mutMat
Now populate this matrix with the relevant information about what kind of mutatation each gene has in each sample.
for(i in 1:length(tcgaIdsAll))
{
mutMat[genesWithProteinChangeList_unlist[[i]], i] <- rep(1, length(genesWithProteinChangeList_unlist[[i]]))
}
print(mutMat[1:5, 1:5])
## TCGA-OR-A5J1-01 TCGA-OR-A5J2-01 TCGA-OR-A5J3-01 TCGA-OR-A5J4-01
## PTCHD2 0 1 0 0
## VPS13D 0 0 0 0
## PHC2 0 0 0 0
## LCE1B 1 0 0 0
## LCE1A 0 0 0 0
## TCGA-OR-A5J5-01
## PTCHD2 0
## VPS13D 0
## PHC2 0
## LCE1B 0
## LCE1A 0
What are the most commonly mutated genes, is this consistent with expectation? Yes.
numMuts <- apply(mutMat, 1, function(row)return(sum(!row == 0)))
print(sort(numMuts, decreasing=T)[1:10])
## TP53 TTN MUC16 PIK3CA CSMD3 FLG LRP1B SYNE1 RYR2 PCLO
## 2750 2426 1456 1038 835 833 831 819 801 771
These TCGA samples contain several different types of tumors. Here, we will focus on the largest of these groups (“Primary Solid Tumors”). These are 8528 samples and are labeled “01” in the TCGA sample IDs. 01: Primary Solid Tumor 02: Recurrent Solid Tumor 03: Primary Blood derived cancer - peripheral blood 04: Regcurrent blood rerived cancer - bone marrow 05: Additional - new primary 06: Metastatic
tumorTypeId <- sapply(strsplit(colnames(mutMat), "-", fixed=TRUE), function(l)return(l[4]))
print(table(tumorTypeId))
## tumorTypeId
## 01 02 03 05 06
## 8528 14 197 9 289
Lets remove everything but the “Primary Solid Tumors (i.e. "01”)“.
mutMat_only01 <- mutMat[, tumorTypeId == "01"]
theIds <- colnames(mutMat_only01)
mutMat_nodups <- mutMat_only01[, !duplicated(theIds)] # Some samples were listed in multiple folders when the data was downloaded from TCGA (e.g. the KIRC and KIPAN folders). This step will remove these samples.
mutIds <- sapply(strsplit(colnames(mutMat_nodups), "-", fixed=T), function(l)return(l[3]))
colnames(mutMat_nodups) <- mutIds
# save(mutMat_nodups, file="/mnt/data_scratch/prediXcanProj/Results/rDatas/mutMat.RData")
Load the imputed drug sensitivty data.
load(file=paste(theRootDir, "dataOut/allDrugPredictions_mat.RData", sep="")) # allDrugPredictions_mat, cancerTypesVec,
names(cancerTypesVec) <- colnames(allDrugPredictions_mat)
Make a plot for lapatinib across all cancer types
lapatinibTypes <- split(allDrugPredictions_mat["Lapatinib", ], cancerTypesVec)
svg(paste(theRootDir, "figures/lapatinibTypes.svg", sep=""), width=8, height=8)
boxplot(lapatinibTypes, las=2)
dev.off()
## png
## 2
Extract the 01a samples, i.e. tumor samples.
all01ASamples <- colnames(allDrugPredictions_mat)[which(sapply(strsplit(colnames(allDrugPredictions_mat), ".", fixed=T), function(a)a[4]) == "01A")]
preds01a <- allDrugPredictions_mat[, all01ASamples]
cancerTypes01a <- cancerTypesVec[all01ASamples]
sampIds01a <- sapply(strsplit(all01ASamples, ".", fixed=T), function(l)return(l[3]))
names(cancerTypes01a) <- sampIds01a
colnames(preds01a) <- sampIds01a
inPredAndMutData <- sampIds01a[sampIds01a %in% mutIds] # samples for which we have both predicted drug response and mutation calls
Run the associations between all genes and drugs, for drugs with at least 50 mutations.
cancerTypes01a_filt_ord <- cancerTypes01a[inPredAndMutData]
preds01a_filt_ord <- preds01a[, inPredAndMutData]
mutMat_nodups_ordFilt <- mutMat_nodups[, inPredAndMutData]
commonMuts <- apply(mutMat_nodups_ordFilt, 1, sum)
commonlyMutated <- mutMat_nodups_ordFilt[which(commonMuts >= 50), ]
commonlyMutated <- commonlyMutated[-which(rownames(commonlyMutated) == "Unknown"), ] # there is an entry for gene wiht an "Unknown" HUGO ID. Remove these.
pValList <- list()
betaValList <- list()
for(i in 1:nrow(preds01a_filt_ord))
{
pValList[[i]] <- numeric()
betaValList[[i]] <- numeric()
for(j in 1:nrow(commonlyMutated))
{
thecoefs <- coef(summary(lm(preds01a_filt_ord[i,]~commonlyMutated[j,]+cancerTypes01a_filt_ord)))
pValList[[i]][[j]] <- thecoefs[2,4]
betaValList[[i]][[j]] <- thecoefs[2,1]
}
}
Create the "cancer-type” corrected TCGA drug prediction data matrix. This can potentially be used by others in subsequent analysis. This will be Supplementary Table 9
imputedDrugResponses_correctCantype <- numeric(nrow(preds01a_filt_ord)*ncol(preds01a_filt_ord))
dim(imputedDrugResponses_correctCantype) <- c(nrow(preds01a_filt_ord), ncol(preds01a_filt_ord))
for(i in 1:nrow(preds01a_filt_ord))
{
imputedDrugResponses_correctCantype[i, ] <- residuals(lm(preds01a_filt_ord[i,]~cancerTypes01a_filt_ord))
}
rownames(imputedDrugResponses_correctCantype) <- rownames(preds01a_filt_ord)
colnames(imputedDrugResponses_correctCantype) <- colnames(preds01a_filt_ord)
write.csv(imputedDrugResponses_correctCantype, paste(theRootDir, "tables/imputedDrugResponses_correctCantype.csv", sep=""))
The total number of genes genome wide with at least one somatic protein coding change called in TCGA.
length(commonMuts) # [1] 27856
## [1] 27856
The number of genes which have a somatic protein coding change in at least 50 samples
print(sum(commonMuts > 50)) # [1] 1673
## [1] 1673
# Get the adjusted p-value for each gene-drug combination, pull out the significant associations and create a supplementary table that lists these for "predictable" drugs?.
sigPs <- list()
pAdjListCantype <- list()
for(i in 1:length(pValList))
{
names(pValList[[i]]) <- rownames(commonlyMutated)
names(betaValList[[i]]) <- rownames(commonlyMutated)
padj <- p.adjust(pValList[[i]], method="BH")
sigPs[[i]] <- padj[padj < 0.05]
pAdjListCantype[[i]] <- padj
}
names(sigPs) <- rownames(preds01a_filt_ord)
names(pValList) <- rownames(preds01a_filt_ord)
names(betaValList) <- rownames(preds01a_filt_ord)
names(pAdjListCantype) <- rownames(preds01a_filt_ord)
Print the top associations
print(sort(unlist(pValList))[1:30])
## Nutlin.3a.TP53 PD.0332991.RB1
## 2.616094e-77 5.297248e-40
## PD.0325901.TP53 Methotrexate.GTF2I
## 9.695259e-25 1.788312e-24
## JW.7.52.1.IDH1 RDEA119.TP53
## 3.210826e-24 1.019750e-23
## AS601245.GTF2I X17.AAG.TP53
## 4.656693e-22 6.196315e-21
## AZD6482.GATA3 Z.LLNle.CHO.IDH1
## 3.820064e-19 4.770221e-18
## CGP.082996.NFE2L2 Shikonin.KEAP1
## 1.040903e-17 2.843464e-17
## Metformin.GTF2I CHIR.99021.TP53
## 4.943619e-17 5.943395e-17
## GNF.2.NFE2L2 SL.0101.1.GTF2I
## 8.439333e-17 4.869669e-16
## AKT.inhibitor.VIII.PIK3CA Erlotinib.NFE2L2
## 1.119735e-15 2.187966e-15
## AZ628.RB1 PD.0325901.RB1
## 3.183476e-15 5.508564e-15
## Bortezomib.IDH1 Rapamycin.IDH1
## 6.050093e-15 6.955193e-15
## ATRA.GTF2I Bicalutamide.GTF2I
## 1.119310e-14 3.739457e-14
## SB590885.TP53 BMS.708163.RB1
## 4.420161e-14 6.166012e-14
## CGP.60474.IDH1 AZD6244.TP53
## 7.492144e-14 8.907780e-14
## AZ628.TP53 PF.562271.RB1
## 9.917278e-14 1.982395e-13
Write all significant associations out to a supplmentary table. We are only considering results for “predictable” drugs, i.e. drugs with a spearman corrleation of > 0.3 in cross validation in the GDSC cancer cell lines on which these models were intially fit.
predictableDrugs <- c("ABT.263", "ABT.888", "AG.014699", "AICAR", "ATRA", "Axitinib", "AZ628", "AZD.0530", "AZD.2281", "AZD6244", "AZD6482", "AZD7762", "BAY.61.3606", "BIBW2992", "Bicalutamide", "BI.D1870", "Bleomycin", "BMS.536924", "BMS.754807", "Bortezomib", "Bosutinib", "BX.795", "Camptothecin", "CEP.701", "CGP.082996", "CHIR.99021", "CI.1040", "Cisplatin", "Cytarabine", "Dasatinib", "DMOG", "Docetaxel", "Elesclomol", "Erlotinib", "FH535", "FTI.277", "Gefitinib", "Gemcitabine", "IPA.3", "Lapatinib", "Methotrexate", "MG.132", "Midostaurin", "Mitomycin.C", "Nilotinib", "Nutlin.3a", "NVP.BEZ235", "NVP.TAE684", "Obatoclax.Mesylate", "PAC.1", "PD.0325901", "PD.0332991", "PD.173074", "PLX4720", "RDEA119", "SB590885", "Sunitinib", "Temsirolimus", "Thapsigargin", "Tipifarnib", "TW.37", "Vinblastine", "Vorinostat", "VX.702", "WH.4.023", "WO2009093972", "WZ.1.84", "X17.AAG", "X681640", "XMD8.85", "ZM.447439")
print(length(predictableDrugs))
## [1] 71
# We only want to report the p-values for the "predictable" drugs....
pValList_predictable <- unlist(pValList[predictableDrugs])
ord <- order(pValList_predictable)
pValList_predictable_ord <- pValList_predictable[ord]
betaValList_predictable <- unlist(betaValList[predictableDrugs])[ord]
pAdjListCantype_predictable <- unlist(pAdjListCantype[predictableDrugs])[ord]
pAdjListCantype_predictable_forNumModels <- (pAdjListCantype_predictable*71)
pAdjListCantype_predictable_forNumModels[pAdjListCantype_predictable_forNumModels > 1] <- 1 # adjust the FDRs for the number of models and cap this at 1. This is equivalent to a Bonferroni correction for 71 tests.
outTab <- cbind(pValList_predictable_ord, pAdjListCantype_predictable, betaValList_predictable, pAdjListCantype_predictable_forNumModels)
colnames(outTab) <- c("P-value", "FDR", "Effect Size (beta)", "FDR Corrected for 71 Models")
write.csv(outTab, paste(theRootDir, "tables/allResults_control_for_tissue.csv", sep=""))
sum(outTab[,4] < 0.05) # [1] 104
## [1] 142
Make figures for Nultlin 3 and P53, for PD.0332991 and RB1 and for KRAS and Erlotinib.
svg(paste(theRootDir, "figures/nutlin3_p53.svg", sep=""), width=3, height=3)
hist(-log10(pValList[["Nutlin.3a"]]), breaks=100, col="#8dd3c7", las=1, xlab=expression("-Log"[10]*"P-value"), cex.axis=0.75, main="Nutlin-3a")
abline(v=-log10(pValList[["Nutlin.3a"]]["TP53"]), col="red")
dev.off()
## png
## 2
svg(paste(theRootDir, "figures/PD0332991_rb1.svg", sep=""), width=3, height=3)
hist(-log10(pValList[["PD.0332991"]]), breaks=100, col="#8dd3c7", main="PD-0332991", las=1, xlab=expression("-Log"[10]*"P-value"), cex.axis=0.75)
abline(v=-log10(pValList[["PD.0332991"]]["RB1"]), col="red")
dev.off()
## png
## 2
svg(paste(theRootDir, "figures/erlotinib_kras.svg", sep=""), width=3, height=3)
hist(-log10(pValList[["Erlotinib"]]), breaks=100, col="#8dd3c7", main="Erlotinib", las=1, xlab=expression("-Log"[10]*"P-value"), cex.axis=0.75)
abline(v=-log10(pValList[["Erlotinib"]]["KRAS"]), col="red")
dev.off()
## png
## 2
Now re-do the gene-drug association analysis, but control for “general levels of drug sensitivty (glds)” instead of for cancer type. This algorithm is described in Geeleher et al, Genome Biolgoy (2016). First, I need to calculate a set of negative control (i.e. unrelated) drugs for each drug. Then I need to calculate the principal components on the IC50 values for these drugs.
drugRelatedness <- read.csv(paste(theRootDir, "dataIn/categorys.csv", sep=""), as.is=TRUE)
theseDrugNames <- rownames(preds01a_filt_ord)
drugRelatedness[, "theseDrugNames"] <- unlist(strsplit(drugRelatedness[, "DrugNamesOtherFile"], "_IC_50"))
pairCor <- cor(t(preds01a_filt_ord), method="spearman")
controlPcsList <- list()
for(j in 1:nrow(preds01a_filt_ord))
{
categoryThisDrug <- drugRelatedness[, "Drug.Category"][which(drugRelatedness["theseDrugNames"] == rownames(preds01a_filt_ord)[j])]
negControlDrugs <- na.omit(drugRelatedness[!drugRelatedness[, "Drug.Category"] %in% categoryThisDrug, "theseDrugNames"])
pairwiseCorNear <- names(rank(abs(pairCor[, colnames(t(preds01a_filt_ord))[j]]))[118:137]) # also remove very correlated drugs...
negControlDrugs <- setdiff(negControlDrugs, pairwiseCorNear) # remove very highly correlated drugs from "negative controls"
controlPCsAll <- prcomp(t(preds01a_filt_ord)[, negControlDrugs])$x
controlPcsList[[j]] <- controlPCsAll
}
######################################################################################################################################################################
Run the analysis and control for GLDS, for each drug, control for the first 50 principal components of the negative control drugs.
pValList_glds_only <- list()
betaValList_glds_only <- list()
for(i in 1:nrow(preds01a_filt_ord))
{
pValList_glds_only[[i]] <- numeric()
betaValList_glds_only[[i]] <- numeric()
for(j in 1:nrow(commonlyMutated))
{
thecoefs <- coef(summary(lm(preds01a_filt_ord[i,]~commonlyMutated[j,]+controlPcsList[[i]][,1:50])))
pValList_glds_only[[i]][[j]] <- thecoefs[2,4]
betaValList_glds_only[[i]][[j]] <- thecoefs[2,1]
}
}
Create the “cancer-type” corrected TCGA drug prediction data matrix. This can potentially be used by others in subsequent analysis. This will be Supplementary Table 10
imputedDrugResponses_correctGlds <- numeric(nrow(preds01a_filt_ord)*ncol(preds01a_filt_ord))
dim(imputedDrugResponses_correctGlds) <- c(nrow(preds01a_filt_ord), ncol(preds01a_filt_ord))
for(i in 1:nrow(preds01a_filt_ord))
{
imputedDrugResponses_correctGlds[i, ] <- residuals(lm(preds01a_filt_ord[i,]~controlPcsList[[i]][,1:50]))
}
rownames(imputedDrugResponses_correctGlds) <- rownames(preds01a_filt_ord)
colnames(imputedDrugResponses_correctGlds) <- colnames(preds01a_filt_ord)
write.csv(imputedDrugResponses_correctGlds, paste(theRootDir, "tables/imputedDrugResponses_correctGlds.csv", sep=""))
Calculate false discovery rates for these samples. Then output a table of the top results, but only include the results for the “predictable” drugs, i.e. those with a spearman's correlation of > 0.3 in the cell lines on which these data were fit.
sigPs_glds_only <- list()
pAdjListGlds <- list()
for(i in 1:length(pValList_glds_only))
{
names(pValList_glds_only[[i]]) <- rownames(commonlyMutated)
names(betaValList_glds_only[[i]]) <- rownames(commonlyMutated)
padj <- p.adjust(pValList_glds_only[[i]], method="BH")
sigPs_glds_only[[i]] <- padj[padj < 0.0005]
pAdjListGlds[[i]] <- padj
}
names(sigPs_glds_only) <- rownames(preds01a_filt_ord)
names(pValList_glds_only) <- rownames(preds01a_filt_ord)
names(betaValList_glds_only) <- rownames(preds01a_filt_ord)
names(pAdjListGlds) <- rownames(preds01a_filt_ord)
print(sort(unlist(pValList_glds_only))[1:30])
## Nutlin.3a.TP53 PLX4720.BRAF RO.3306.IDH1
## 1.586798e-58 3.158528e-38 7.194430e-31
## Thapsigargin.KRAS Erlotinib.KRAS CMK.IDH1
## 7.274147e-31 1.912561e-30 5.407619e-30
## Mitomycin.C.IDH1 S.Trityl.L.cysteine.IDH1 S.Trityl.L.cysteine.KRAS
## 1.336361e-29 3.258877e-28 2.779767e-27
## Dasatinib.IDH1 Sunitinib.IDH1 GNF.2.IDH1
## 1.031095e-26 3.196810e-26 4.326400e-26
## TW.37.IDH1 Metformin.ZFPM1 CHIR.99021.BRAF
## 5.107202e-25 5.688284e-25 3.144414e-24
## JNK.9L.BRAF CCT018159.KEAP1 Cytarabine.BRAF
## 3.244100e-24 2.052525e-22 5.658124e-22
## GW843682X.BRAF EHT.1864.KRAS Lapatinib.GNAQ
## 8.389303e-22 9.209153e-22 1.209930e-21
## EHT.1864.BRAF AZD8055.BRAF OSI.906..
## 1.629043e-21 2.937785e-21 3.395010e-21
## PF.4708671.VHL SB590885.TP53 Mitomycin.C.NFE2L2
## 3.104441e-20 3.193463e-20 6.051618e-20
## WZ.1.84.BAP1 AZ628.BRAF CCT018159.STK11
## 7.079341e-20 9.498871e-20 1.125190e-19
We want to only cause adjusted p-values for the “predictable” drugs…
pValList_glds_only_predictable <- unlist(pValList_glds_only[predictableDrugs])
ord <- order(pValList_glds_only_predictable)
pValList_glds_only_predictable_ord <- pValList_glds_only_predictable[ord]
betaValList_glds_only_predictable <- unlist(betaValList_glds_only[predictableDrugs])[ord]
pAdjListGlds_predictable <- unlist(pAdjListGlds[predictableDrugs])[ord]
pAdjListGlds_predictable_forNumModels <- (pAdjListGlds_predictable*71)
pAdjListGlds_predictable_forNumModels[pAdjListGlds_predictable_forNumModels > 1] <- 1 # adjust the FDRs for the number of models and cap this at 1. This is equivalent to a Bonferroni correction for 71 tests.
outTab_glds <- cbind(pValList_glds_only_predictable_ord, pAdjListGlds_predictable, betaValList_glds_only_predictable, pAdjListGlds_predictable_forNumModels)
colnames(outTab_glds) <- c("P-value", "FDR", "Effect Size (beta)", "FDR corrected for number of models")
write.csv(outTab_glds, paste(theRootDir, "tables/allResults_glds.csv", sep=""))
sum(outTab_glds[,4] < 0.05) # [1] 263
## [1] 263
Make soem figures for the top results.
gldspValMat <- do.call(cbind, pValList_glds_only)
Make GLDS figures for Nultlin 3 and P53, for PD.0332991 and RB1 and for KRAS and Erlotinib.
svg(paste(theRootDir, "figures/nutlin3_p53_glds.svg", sep=""), width=3, height=3)
hist(-log10(pValList_glds_only[["Nutlin.3a"]]), breaks=100, col="#8dd3c7", main="Nutlin-3a", las=1, xlab=expression("-Log"[10]*"P-value"), cex.axis=0.75)
abline(v=-log10(pValList_glds_only[["Nutlin.3a"]]["TP53"]), col="red")
dev.off()
## png
## 2
svg(paste(theRootDir, "figures/PD0332991_rb1_glds.svg", sep=""), width=3, height=3)
hist(-log10(pValList_glds_only[["PD.0332991"]]), breaks=100, col="#8dd3c7", main="PD-0332991", las=1, xlab=expression("-Log"[10]*"P-value"), cex.axis=0.75)
abline(v=-log10(pValList_glds_only[["PD.0332991"]]["RB1"]), col="red")
dev.off()
## png
## 2
svg(paste(theRootDir, "figures/erlotinib_kras_glds.svg", sep=""), width=3, height=3)
hist(-log10(pValList_glds_only[["Erlotinib"]]), breaks=100, col="#8dd3c7", main="Erlotinib", las=1, xlab=expression("-Log"[10]*"P-value"), cex.axis=0.75)
abline(v=-log10(pValList_glds_only[["Erlotinib"]]["KRAS"]), col="red")
dev.off()
## png
## 2
Print some of the results for mutations that we'd expect for drugs which are “predictable”. And for drugs which are not….
print(pValList_glds_only[["PD.0332991"]]["RB1"])
## RB1
## 4.328493e-17
print(pValList_glds_only[["Nutlin.3a"]]["TP53"])
## TP53
## 1.586798e-58
print(pValList_glds_only[["PLX4720"]]["BRAF"])
## BRAF
## 3.158528e-38
print(pValList_glds_only[["SB590885"]]["BRAF"])
## BRAF
## 0.0007423603
print(pValList_glds_only[["PD.0325901"]]["BRAF"])
## BRAF
## 4.510796e-05
print(pValList_glds_only[["Gefitinib"]]["EGFR"])
## EGFR
## 0.008663456
print(pValList_glds_only[["Erlotinib"]]["KRAS"])
## KRAS
## 1.912561e-30
print(pValList_glds_only[["PD.0332991"]]["CDKN2A"])
## CDKN2A
## 0.6810552
print(pValList_glds_only[["AZD.2281"]]["BRCA2"]) # There are potential problems with screening of PARP inhibors in GDSC, see Heitmann et al (Oral Oncology 2014). Drugs likely not screened for long enough.
## BRCA2
## 0.2885234
print(pValList_glds_only[["AZD.2281"]]["BRCA1"])
## BRCA1
## 0.9467676
print(pValList_glds_only[["ATRA"]]["RARA"]) # not enough mutations
## <NA>
## NA
print(pValList_glds_only[["Sunitinib"]]["PDGFRB"])
## PDGFRB
## 0.4169226
print(pValList_glds_only[["Sunitinib"]]["FLT3"]) # not enough mutations
## <NA>
## NA
print(pValList_glds_only[["Erlotinib"]]["EGFR"])
## EGFR
## 0.143658
print(pValList_glds_only[["Rapamycin"]]["AKT1"]) # Rapamycin not in the "predictable" drugs list
## <NA>
## NA
print(pValList_glds_only[["Imatinib"]]["KIT"])
## KIT
## 0.1999907
print(pValList_glds_only[["Nilotinib"]]["KIT"])
## KIT
## 0.2552595
print(pValList_glds_only[["Sunitinib"]]["KIT"])
## KIT
## 0.2646219
Print the same associations when controlling for cancer type instead of GLDS. Arguably the GLDS results are slightly better.
print(pValList[["PD.0332991"]]["RB1"])
## RB1
## 5.297248e-40
print(pValList[["Nutlin.3a"]]["TP53"])
## TP53
## 2.616094e-77
print(pValList[["PLX4720"]]["BRAF"])
## BRAF
## 0.05468207
print(pValList[["SB590885"]]["BRAF"])
## BRAF
## 0.09555544
print(pValList[["PD.0325901"]]["BRAF"])
## BRAF
## 1.10102e-08
print(pValList[["Gefitinib"]]["EGFR"])
## EGFR
## 0.2223254
print(pValList[["Erlotinib"]]["KRAS"])
## KRAS
## 0.0007235989
print(pValList[["PD.0332991"]]["CDKN2A"])
## CDKN2A
## 0.7772119
print(pValList[["AZD.2281"]]["BRCA2"])
## BRCA2
## 0.7911218
print(pValList[["AZD.2281"]]["BRCA1"])
## BRCA1
## 0.7460503
print(pValList[["ATRA"]]["RARA"]) # not enough mutations
## <NA>
## NA
print(pValList[["Sunitinib"]]["PDGFRB"])
## PDGFRB
## 0.8502205
print(pValList[["Sunitinib"]]["FLT3"]) # not enough mutations
## <NA>
## NA
print(pValList[["Erlotinib"]]["EGFR"])
## EGFR
## 0.4098387
print(pValList[["Rapamycin"]]["AKT1"]) # Rapamycin not in the "predictable" drugs list
## <NA>
## NA
print(pValList[["Imatinib"]]["KIT"])
## KIT
## 3.989283e-09
print(pValList[["Sunitinib"]]["KIT"])
## KIT
## 0.03297061
print(pValList[["Nilotinib"]]["KIT"])
## KIT
## 4.164036e-06
print(pValList_glds_only[["Sunitinib"]]["KIT"])
## KIT
## 0.2646219