This script will download all of the TCGA data that we have used in this project. This data is obtained from the firebrowse.org repository. To run from the Bionimbus PDC, first run the following commands from the command line to set up the HTTP proxy: export http_proxy=http://cloud-proxy:3128 export https_proxy=http://cloud-proxy:3128 Set the root directory where the data will be stored. NB: this directory needs to be set / created based on your own system!!

theRootDir <- "/mnt/data_scratch/finalData/"

create the “dataIn/” directory if it doesn't already exist.

dir.create(paste(theRootDir, "dataIn/", sep=""), showWarnings = FALSE)

The data is organized on firebrowse by cancer type. They use these disease abbreviations to access the different folders containing the various data.

diseaseAbbrvs <- c("ACC", "BLCA", "BRCA", "CESC", "CHOL", "COAD", "COADREAD", "DLBC", "ESCA", "FPPP", "GBM", "GBMLGG", "HNSC", "KICH", "KIPAN", "KIRC", "KIRP", "LAML", "LGG", "LIHC", "LUAD", "LUSC", "MESO", "OV", "PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "STES", "TGCT", "THCA", "THYM", "UCEC", "UCS", "UVM")

Download all the TCGA RNA-seq data.

missingAbrvsRnaSeq <- c(10, 31) # there is no RNA-seq data for "FPPP" or "STAD"
rnaSeqDiseaseAbbrvs <- diseaseAbbrvs[-missingAbrvsRnaSeq]
rnaSeqFilesDir <- paste(theRootDir, "dataIn/rnaSeq/", sep="")
dir.create(rnaSeqFilesDir, showWarnings = FALSE) # make this directory if it doesn't exist.
for(i in 1:length(rnaSeqDiseaseAbbrvs))
{
  fname <- paste("gdac.broadinstitute.org_", rnaSeqDiseaseAbbrvs[i], ".Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz", sep="")
  download.file(paste("http://gdac.broadinstitute.org/runs/stddata__2015_08_21/data/", rnaSeqDiseaseAbbrvs[i], "/20150821/gdac.broadinstitute.org_", rnaSeqDiseaseAbbrvs[i], ".Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz", sep=""), paste(rnaSeqFilesDir, fname, sep=""))
}

# Unzip the downloaded ".tar.gz" RNA-seq data! NB, this command has been tested in Linux. It may not work in Windows. If it does not work, please extract these files manually using software such as 7zip.
thegzFiles <-  paste(rnaSeqFilesDir, dir(rnaSeqFilesDir), sep="")
sapply(thegzFiles, untar, exdir=rnaSeqFilesDir)
##      /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_ACC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_BLCA.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_BRCA.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_CESC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_CHOL.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_COAD.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
## /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_COADREAD.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_DLBC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_ESCA.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##   /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_GBMLGG.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##      /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_GBM.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_HNSC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_KICH.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##    /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_KIPAN.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_KIRC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_KIRP.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_LAML.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##      /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_LGG.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_LIHC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_LUAD.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_LUSC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_MESO.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##       /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_OV.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_PAAD.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_PCPG.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_PRAD.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_READ.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_SARC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_SKCM.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_STES.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_TGCT.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_THCA.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_THYM.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##     /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_UCEC.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##      /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_UCS.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0 
##      /mnt/data_scratch/finalData/dataIn/rnaSeq/gdac.broadinstitute.org_UVM.Merge_rnaseqv2__illuminahiseq_rnaseqv2__unc_edu__Level_3__RSEM_genes__data.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                   0

Download the TCGA clinical data.

clinicalFilesDir <- paste(theRootDir, "dataIn/clinical/", sep="")
dir.create(clinicalFilesDir, showWarnings = FALSE) # make this directory if it doesn't exist.

Note: If you have problems directly accessing these data from FireBrowse, please use this code (clinical data is mirrored on github), code to access clinical data from FireBrowse has been commented out because we were having issues with accessing this data right before re-submitting this manuscript, thus we have mirrored these data at the link below.

allTcgaClinAbrvs <- c("acc", "blca", "brca", "cesc", "chol", "cntl", "coad", "dlbc", "esca", "fppp", "gbm", "hnsc", "kich", "kirc", "kirp", "laml", "lcml", "lgg", "lihc", "lnnh", "luad", "lusc", "meso", "misc", "ov", "paad", "pcpg", "prad", "read", "sarc", "skcm", "stad", "tgct", "thca", "thym", "ucec", "ucs", "uvm")
for(i in 1:length(allTcgaClinAbrvs))
{
  fname <- paste("nationwidechildrens.org_clinical_patient_", allTcgaClinAbrvs[i], ".txt", sep="")
  theUrl <- paste("https://raw.github.com/paulgeeleher/tcgaData/master/nationwidechildrens.org_clinical_patient_", allTcgaClinAbrvs[i], ".txt", sep="")
  download.file(theUrl, paste(clinicalFilesDir, fname, sep=""))
}
# for(i in 1:length(allTcgaClinAbrvs))
# {
#   fname <- paste("nationwidechildrens.org_clinical_patient_", allTcgaClinAbrvs[i], ".txt", sep="")
#   theUrl <- paste("https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/", allTcgaClinAbrvs[i] ,"/bcr/biotab/clin/nationwidechildrens.org_clinical_patient_", allTcgaClinAbrvs[i], ".txt", sep="")
#   download.file(theUrl, paste(clinicalFilesDir, fname, sep=""))
# }

Download the level 3 CNV data. Note that these data have had the germline component removed, thus these are somatic CNVs.

diseaseAbbrvsForCnvs <- c("ACC", "BLCA", "BRCA", "CESC", "CHOL", "COAD", "COADREAD", "DLBC", "ESCA", "GBM", "GBMLGG", "HNSC", "KICH", "KIPAN", "KIRC", "KIRP", "LAML", "LGG", "LIHC", "LUAD", "LUSC", "MESO", "OV", "PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "STES", "TGCT", "THCA", "THYM", "UCEC", "UCS", "UVM")
cnvFilesDir <- paste(theRootDir, "dataIn/tcga_cnv_subtracted/", sep="")
dir.create(cnvFilesDir, showWarnings = FALSE) # make this directory if it doesn't exist.
for(i in 1:length(diseaseAbbrvsForCnvs))
{
  download.file(paste("http://gdac.broadinstitute.org/runs/stddata__2015_08_21/data/", diseaseAbbrvsForCnvs[i],"/20150821/gdac.broadinstitute.org_", diseaseAbbrvsForCnvs[i],".Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz", sep=""), paste(cnvFilesDir, "gdac.broadinstitute.org_", diseaseAbbrvsForCnvs[i],".Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz", sep=""))
}
thegzFiles <-  paste(cnvFilesDir, dir(cnvFilesDir), sep="")
sapply(thegzFiles, untar, exdir=cnvFilesDir)
##      /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_ACC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_BLCA.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_BRCA.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_CESC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_CHOL.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_COAD.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
## /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_COADREAD.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_DLBC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_ESCA.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##   /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_GBMLGG.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_GBM.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_HNSC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_KICH.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##    /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_KIPAN.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_KIRC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_KIRP.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_LAML.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_LGG.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_LIHC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_LUAD.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_LUSC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_MESO.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##       /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_OV.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_PAAD.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_PCPG.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_PRAD.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_READ.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_SARC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_SKCM.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_STAD.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_STES.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_TGCT.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_THCA.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_THYM.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_UCEC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_UCS.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/tcga_cnv_subtracted/gdac.broadinstitute.org_UVM.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2015082100.0.0.tar.gz 
##                                                                                                                                                                                                                       0

Download the somatic mutation data.

diseaseAbbrvsForMuts <- c("ACC", "BLCA", "BRCA", "CESC", "CHOL", "COAD", "COADREAD", "DLBC", "ESCA", "GBM", "GBMLGG", "HNSC", "KICH", "KIPAN", "KIRC", "KIRP", "LAML", "LGG", "LIHC", "LUAD", "LUSC", "OV", "PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "STES", "TGCT", "THCA", "THYM", "UCEC", "UCS", "UVM")
mutFilesDir <- paste(theRootDir, "dataIn/mutation_data/", sep="")
dir.create(mutFilesDir, showWarnings = FALSE) # make this directory if it doesn't exist.
for(i in 1:length(diseaseAbbrvsForMuts))
{
  mutationDataUrl <- paste("http://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/", diseaseAbbrvsForMuts[i], "/20160128/gdac.broadinstitute.org_", diseaseAbbrvsForMuts[i],".Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz", sep="")
  fname <- paste("gdac.broadinstitute.org_", diseaseAbbrvsForMuts[i],".Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz", sep="")
  download.file(mutationDataUrl, paste(mutFilesDir, fname, sep=""))
}
thegzFiles <-  paste(mutFilesDir, dir(mutFilesDir), sep="")
sapply(thegzFiles, untar, exdir=mutFilesDir)
##      /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_ACC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_BLCA.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_BRCA.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_CESC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_CHOL.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_COAD.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
## /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_COADREAD.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_DLBC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_ESCA.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##   /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_GBMLGG.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_GBM.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_HNSC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_KICH.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##    /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_KIPAN.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_KIRC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_KIRP.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_LAML.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_LGG.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_LIHC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_LUAD.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_LUSC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##       /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_OV.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_PAAD.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_PCPG.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_PRAD.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_READ.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_SARC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_SKCM.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_STAD.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_STES.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_TGCT.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_THCA.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_THYM.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##     /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_UCEC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_UCS.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0 
##      /mnt/data_scratch/finalData/dataIn/mutation_data/gdac.broadinstitute.org_UVM.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz 
##                                                                                                                                       0

There is additional clinical information in this file, that we need to add some drug information to one of the supplementary tables (reviewer request). Used in “breast_cancer_analysis.R”.

thisUrl <- "http://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/BRCA/20160128/gdac.broadinstitute.org_BRCA.Merge_Clinical.Level_1.2016012800.0.0.tar.gz"
thisFname <- "gdac.broadinstitute.org_BRCA.Merge_Clinical.Level_1.2016012800.0.0.tar.gz"
download.file(thisUrl, paste(clinicalFilesDir, thisFname, sep=""))
untar(paste(clinicalFilesDir, thisFname, sep=""), exdir=clinicalFilesDir)

Download the “categories.csv” file. This was definited in Geeleher et al, Genome Biology 2016. Here it is downloaded from GitHub.

theUrl <- paste("https://raw.github.com/paulgeeleher/tcgaData/master/categorys.csv", sep="")
download.file(theUrl, paste(theRootDir, "dataIn/categorys.csv", sep=""))

Download the cell_lines_copy_number.csv, which is the CGP CNV file. Here it is downloaded from GitHub (originally obtained from CGP/GDSC website: www.cancerrxgene.org).

theUrl <- paste("https://raw.github.com/paulgeeleher/tcgaData/master/cgp_cnv_data/cell_lines_copy_number.csv", sep="")
download.file(theUrl, paste(theRootDir, "dataIn/cell_lines_copy_number.csv", sep=""))

Print the sessiion to aid reproducibilty.

print(sessionInfo())
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.3 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.12.3
## 
## loaded via a namespace (and not attached):
## [1] magrittr_1.5   tools_3.2.2    stringi_1.0-1  stringr_1.0.0 
## [5] evaluate_0.8.3