This script applies the mSigAct signature presence test (Ng et al. 2017), to evaluate the hypothesis that the mutational signature identified in OSCC 62074759 (the A^nT signature) is present in Candidate tumors. Candidate tumors were previously identified as tumors showing strong enrichment for mutations with the characteristics of A^nT as described in Supplemental Data 1 of this manuscript.
We start with the following input data:
candidates is the tumors identified using the script in Supplementary Data 1 (this is the ‘results’ object from this script)
PCAWG attributions contain 4 mutation files, one for each of the 4 major groups of tumors in the PCAWG7 analysis of mutational signatures (Alexandrov et al., 2018, downloaded from: https://www.synapse.org/#!Synapse:syn11804065 on June 24th 2018).
First, extract for each of the candidates, the PCAWG7 signature assignments which will be later used in the analysis
library(stringr)
candidates<-read.csv("candidatesW_AnT.txt",sep="\t",as.is=T)
## remove 62074759
candidates<-candidates[-nrow(candidates),]
candidates$sample<-str_split_fixed(rownames(candidates),"___",2)[,1]
## rename the tumor types in the candidates table because of different annotations
tumorTypes<-read.csv("tumorTypes.txt",sep="\t",as.is=T,row.names = 1)
for(i in rownames(tumorTypes)){
candidates$sample<-gsub(i,
tumorTypes$annotation[rownames(tumorTypes) == i],
candidates$sample)
}
## specify which file we should look in for the attributions
candidates$attributions[candidates$dataType == "WES_Other"]<-"nonPCAWG_WES"
candidates$attributions[candidates$dataType == "WES_TCGA"]<-"TCGA_WES"
candidates$attributions[candidates$dataType == "WGS_Other"]<-"nonPCAWG_WGS"
candidates$attributions[candidates$dataType == "WGS_ICGC"]<-"PCAWG_WGS"
## gather the signature attributions for the candidate samples
files<-list.files("PCAWG7 attributions",full.names = T)
attributions<-NULL
for(i in files){
df<-read.csv(i,as.is=T)
samples<-paste0(df$Cancer.Types,"::",df$Sample.Names)
df2<-df[samples %in% candidates$sample,]
attributions<-rbind(attributions,df2)
}
source("src/mSigTools.v0.13.R")
## Loading required package: SnowballC
source("src/mSigAct.v0.10.R")
##
## Attaching package: 'sets'
## The following object is masked from 'package:stringr':
##
## %>%
## load signatures
cosmic.sigs <-
get.signatures(
signature.file='PCAWG7and62074759_96.tsv',
exome.op=.h19.96.sureselect.v6.op)
cosmic.wes <- cosmic.sigs$exome
cosmic.wgs <- cosmic.sigs$genome
rm(cosmic.sigs)
# Downstream mSigAct requires that the elements of signatures sum to exactly 1.
# Eventually move this code to mSigAct or change to all.equals tolerance=....
# We need to run multiple sweep call, presumably because of rounding.
for (i in 1:3) cosmic.wes <- sweep(cosmic.wes, MARGIN=2, colSums(cosmic.wes), '/')
stopifnot(colSums(cosmic.wes) == 1)
for (i in 1:3) cosmic.wgs <- sweep(cosmic.wgs, MARGIN=2, colSums(cosmic.wgs), '/')
stopifnot(colSums(cosmic.wgs) == 1)
## load the catalogs of the candidate tumors
exomes<-read.csv("catalogs/spectrum_counts_exomes.txt",sep="\t",as.is=T)
genomes<-read.csv("catalogs/spectrum_counts_genomes.txt",sep="\t",as.is=T)
Define function to run mSigAct per sample, using the signatures attributed in the PCAWG analysis and AnT.
sortSigs<-function(sigs,base="SBS"){
sigs<-sigs[order(as.numeric(gsub("a","",
gsub("b","",
gsub("c","",
gsub("d","",
gsub(base,"",sigs)))))))]
return(sigs)
}
run.mSigAct.per.smp<-function(smp){
tmp<-attributions[attributions$Sample.Names == smp,4:ncol(attributions)]
sigs<-colnames(tmp)[!tmp[1,] == 0]
## signatures SBS1, SBS5 and SBS40 are present in all tumors
## therefore regardless of previous assignments,
## add SBS1 and SBS5 in this analysis
if(!"SBS1" %in% sigs){sigs<-c(sigs,"SBS1")}
if(!"SBS5" %in% sigs){sigs<-c(sigs,"SBS5")}
sigs<-sortSigs(sigs)
## depending on data type, use WGS or WES signatures
dataType<-candidates$data[grep(smp,candidates$sample)]
dataType<-str_split_fixed(dataType,"_",2)[,1]
if(dataType == "WGS"){
universe<-cosmic.wgs
catalog<-genomes
} else {
universe<-cosmic.wes
catalog<-exomes
}
subverse<-universe[,c(sigs,"AnT")]
## ensure that catalog and signatures are in the same order
rownames(catalog)<-paste0(catalog$Before,catalog$Ref,
catalog$After,catalog$Var)
subverse<-subverse[rownames(catalog),]
input<-as.matrix(catalog[,grep(gsub("-",".",smp),colnames(catalog))])
colnames(input)<-smp
rownames(input)<-rownames(subverse)
analysis<-process.one.group(input,
subverse,
target.sig.name = "AnT",
path.root=paste0("mSigAct_output/",smp),
obj.fun = obj.fun.nbinom.maxlh,
nbinom.size=10, ## = dispersion parameter
mc.cores=1) ## = number of cores
# results
df<-t(rbind(pval=analysis$pval,analysis$exposure))
rownames(df)<-smp
return(df)
}
## make a df to save the attributions in
mSigAct_result<-attributions
mSigAct_result$AnT<-NA
mSigAct_result$mSigAct_pval<-0
for(smp in attributions$Sample.Names){
df<-as.data.frame(run.mSigAct.per.smp(smp))
mSigAct_result$mSigAct_pval[mSigAct_result$Sample.Names == smp]<-
df$pval
mSigAct_result[mSigAct_result$Sample.Names == smp,
colnames(mSigAct_result) %in% colnames(df)]<-
df[1,2:ncol(df)]
}
## remove signatures that are not attributed to any sample
mSigAct_result<-mSigAct_result[c(2,which(colSums(mSigAct_result[,-c(1:3)])>0)+3)]
## perform multiple testing correction
mSigAct_result$mSigAct_qval<-p.adjust(mSigAct_result$mSigAct_pval,method="BH")
## format to print output table to html
for(i in 2:(ncol(mSigAct_result)-2)){mSigAct_result[,i]<-as.numeric(format(as.numeric(mSigAct_result[,i]),digits=0,scientific=FALSE))}
mSigAct_result$mSigAct_pval<-format(mSigAct_result$mSigAct_pval,digits=3)
mSigAct_result$mSigAct_qval<-format(mSigAct_result$mSigAct_qval,digits=3)
library(knitr)
kable(mSigAct_result[,-(ncol(mSigAct_result)-1)],row.names=F,align = c("l",rep("c",14)))
| Sample.Names | SBS1 | SBS2 | SBS5 | SBS7a | SBS7b | SBS9 | SBS10a | SBS10b | SBS13 | SBS14 | SBS15 | SBS16 | SBS17a | SBS17b | SBS18 | SBS27 | SBS28 | SBS37 | SBS40 | SBS45 | SBS57 | AnT | mSigAct_qval |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BD121T | 117 | 0 | 145 | 0 | 0 | 0 | 1119 | 833 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 70 | 3.27e-03 |
| BD173T | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 377 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 846 | 6.48e-39 |
| BD182T | 0 | 0 | 349 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 879 | 0 | 0 | 0 | 0 | 877 | 1.77e-69 |
| BD223T | 0 | 0 | 160 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 80 | 182 | 2.40e-16 |
| sysucc-311T | 0 | 0 | 745 | 0 | 0 | 0 | 3913 | 7406 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1864 | 0 | 0 | 0 | 0 | 231 | 8.01e-04 |
| ESO-173 | 64 | 0 | 82 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 23 | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| HCC34T | 0 | 0 | 249 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.75e-01 |
| PCSI_0060_Pa_X | 33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | 47 | 6.68e-07 |
| SKCM-JWCI-WGS-8-Tumor | 0 | 0 | 125 | 394 | 187 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9.27e-01 |
| T155 | 37 | 0 | 41 | 0 | 0 | 0 | 467 | 438 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.25e-01 |
| LP6005935-DNA_B03 | 899 | 0 | 11769 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5759 | 16897 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9577 | 3.63e-18 |
| 8069334 | 1153 | 0 | 3229 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 500 | 1357 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 862 | 3.01e-07 |
| 0047_CRUK_PC_0047_T1_DNA | 496 | 0 | 1904 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 951 | 1.85e-16 |
| SP22031 | 2490 | 0 | 4530 | 0 | 0 | 0 | 124275 | 80170 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 22766 | 0 | 0 | 0 | 0 | 0 | 4.54e-01 |
| SP16886 | 0 | 0 | 12505 | 0 | 0 | 0 | 434277 | 251355 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 151145 | 0 | 0 | 0 | 0 | 0 | 7.13e-01 |
| SP19295 | 4183 | 0 | 6759 | 0 | 0 | 0 | 127419 | 102190 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 17351 | 0 | 0 | 0 | 0 | 1965 | 7.91e-02 |
| SP17905 | 0 | 0 | 30073 | 0 | 0 | 0 | 1215032 | 478155 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 523625 | 185733 | 0 | 0 | 0 | 0 | 8.33e-01 |
| SP21400 | 9932 | 0 | 15411 | 0 | 0 | 0 | 256130 | 446142 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 65424 | 0 | 0 | 0 | 0 | 0 | 1.75e-01 |
| SP18946 | 0 | 0 | 16982 | 0 | 0 | 0 | 490500 | 366755 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 86576 | 0 | 0 | 0 | 0 | 0 | 4.54e-01 |
| SP80615 | 0 | 0 | 57839 | 0 | 0 | 0 | 1070130 | 795814 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 479938 | 0 | 0 | 0 | 0 | 22748 | 2.38e-02 |
| SP81494 | 2341 | 0 | 12182 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2613 | 0 | 0 | 0 | 2963 | 4521 | 0 | 2148 | 7.18e-04 |
| SP81711 | 3789 | 0 | 4065 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3537 | 0 | 0 | 0 | 4975 | 0 | 0 | 3908 | 1.08e-12 |
| SP80754 | 2305 | 0 | 2948 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2097 | 0 | 206 | 0 | 0 | 0 | 0 | 3653 | 6.48e-39 |
| SP111026 | 1481 | 0 | 8490 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9010 | 24066 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3742 | 2.77e-10 |
| SP111101 | 3450 | 0 | 3123 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1011 | 1607 | 0 | 0 | 0 | 0 | 5476 | 0 | 0 | 2245 | 4.04e-07 |
| SP92659 | 1802 | 0 | 6991 | 0 | 0 | 0 | 191629 | 55154 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26686 | 0 | 0 | 0 | 0 | 0 | 4.54e-01 |
| TCGA-AB-2824-03B-01W-0728-08 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 70 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AB-2851-03B-01W-0728-08 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 58 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AB-2867-03B-01W-0728-08 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 115 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AB-2868-03B-01W-0728-08 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 201 | 0 | 0 | 0 | 0 | 0 | 3 | 9.49e-02 |
| TCGA-FU-A3HZ-01A-11D-A20U-09 | 0 | 0 | 107 | 0 | 0 | 0 | 1418 | 1003 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 655 | 0 | 0 | 0 | 0 | 71 | 1.61e-03 |
| TCGA-AY-4071-01A-01W-1073-09 | 45 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 79 | 0 | 0 | 40 | 7.69e-05 |
| TCGA-AG-3892-01A-01W-1073-09 | 0 | 0 | 91 | 0 | 0 | 0 | 1932 | 1570 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-AG-3902-01A-01W-1073-09 | 42 | 0 | 41 | 0 | 0 | 20 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 65 | 1.58e-07 |
| TCGA-2H-A9GM-01A-11D-A37C-09 | 23 | 0 | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 21 | 43 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.38e-01 |
| TCGA-IG-A4QS-01A-11D-A27G-09 | 69 | 0 | 112 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 38 | 64 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | 2.08e-02 |
| TCGA-R6-A6L4-01A-11D-A31U-09 | 8 | 0 | 63 | 0 | 0 | 0 | 0 | 0 | 10 | 0 | 0 | 0 | 17 | 44 | 19 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7.13e-01 |
| TCGA-BA-A4IG-01A-11D-A25Y-08 | 0 | 16 | 81 | 0 | 0 | 0 | 0 | 0 | 27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 44 | 1.51e-06 |
| TCGA-BR-6453-01A-11D-1800-08 | 49 | 0 | 81 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | 65 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.00e+00 |
| TCGA-D7-A4Z0-01A-22D-A25D-08 | 37 | 0 | 62 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 97 | 169 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6.07e-01 |
| TCGA-G2-AA3B-01A-11D-A391-08 | 0 | 352 | 101 | 0 | 0 | 0 | 0 | 0 | 438 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 45 | 3.25e-04 |
| TCGA-GC-A6I3-01A-11D-A31L-08 | 12 | 71 | 105 | 0 | 0 | 0 | 0 | 0 | 90 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | 3.89e-03 |
| TCGA-A5-A0GP-01A-11W-A062-09 | 76 | 0 | 202 | 0 | 0 | 0 | 950 | 804 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 195 | 0 | 0 | 0 | 0 | 0 | 3.25e-01 |
| TCGA-AJ-A5DW-01A-11D-A27P-09 | 97 | 0 | 307 | 0 | 0 | 0 | 3352 | 1499 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 366 | 0 | 0 | 0 | 0 | 0 | 4.15e-01 |
| TCGA-AP-A1E0-01A-11D-A135-09 | 216 | 0 | 325 | 0 | 0 | 0 | 4978 | 2070 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 810 | 0 | 0 | 0 | 0 | 0 | 9.48e-01 |
| TCGA-AX-A1CE-01A-11D-A135-09 | 0 | 0 | 190 | 0 | 0 | 0 | 0 | 0 | 0 | 4221 | 15172 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.13e-01 |
| TCGA-BK-A6W3-01A-12D-A34Q-09 | 123 | 0 | 335 | 0 | 0 | 0 | 4032 | 1867 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 722 | 0 | 0 | 0 | 0 | 90 | 4.80e-02 |
| TCGA-DF-A2KV-01A-11D-A17W-09 | 38 | 0 | 74 | 0 | 0 | 0 | 1008 | 938 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 286 | 0 | 0 | 0 | 0 | 47 | 2.34e-02 |
| TCGA-E6-A1M0-01A-11D-A142-09 | 56 | 0 | 126 | 0 | 0 | 0 | 1913 | 778 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 296 | 0 | 0 | 0 | 0 | 0 | 4.55e-01 |
| TCGA-EO-A3AV-01A-12D-A19Y-09 | 0 | 0 | 398 | 0 | 0 | 0 | 4037 | 1899 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 990 | 0 | 0 | 0 | 0 | 0 | 1.94e-01 |
| TCGA-EO-A3AY-01A-12D-A19Y-09 | 0 | 0 | 333 | 0 | 0 | 0 | 3805 | 1489 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 686 | 0 | 0 | 0 | 0 | 0 | 1.13e-01 |
| TCGA-EY-A1GD-01A-11D-A13L-09 | 57 | 0 | 173 | 0 | 0 | 0 | 581 | 771 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 274 | 0 | 0 | 0 | 0 | 38 | 7.91e-02 |
| TCGA-EY-A1GI-01A-11D-A13L-09 | 101 | 0 | 421 | 0 | 0 | 0 | 4053 | 2481 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 818 | 0 | 0 | 0 | 0 | 0 | 2.07e-01 |
| TCGA-QF-A5YS-01A-11D-A31U-09 | 115 | 0 | 110 | 0 | 0 | 0 | 1152 | 1141 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 220 | 0 | 0 | 0 | 0 | 50 | 2.38e-02 |
| TCGA-QS-A5YQ-01A-11D-A31U-09 | 58 | 0 | 128 | 0 | 0 | 0 | 857 | 485 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 142 | 0 | 0 | 0 | 0 | 0 | 7.23e-01 |