Enrichment analysis of tissue-specific genes in MHBs

In [23]:
library(rGREAT)
library("org.Hs.eg.db")
library(pheatmap)
options(warn=-1)

An example using colon MHBs

In [6]:
gr=read.table("colon_MHB.bed")
colnames(gr)=c("chr","start","end")
gr=GRanges(gr)
start(gr) = start(gr) + 1
head(gr)
GRanges object with 6 ranges and 0 metadata columns:
      seqnames        ranges strand
         <Rle>     <IRanges>  <Rle>
  [1]     chr1 540839-540886      *
  [2]     chr1 564471-564534      *
  [3]     chr1 566287-566317      *
  [4]     chr1 567312-567438      *
  [5]     chr1 567681-567786      *
  [6]     chr1 568529-568570      *
  -------
  seqinfo: 24 sequences from an unspecified genome; no seqlengths
In [7]:
# loading tissue-specific genes
TS = readRDS("tissueSpecificGenes.RDS")
ls(TS)
  1. 'Adipose Tissue'
  2. 'Adrenal Gland'
  3. 'Appendix'
  4. 'Bone Marrow'
  5. 'Breast'
  6. 'Cerebral Cortex'
  7. 'Cervix, uterine'
  8. 'Colon'
  9. 'Duodenum'
  10. 'Endometrium'
  11. 'Esophagus'
  12. 'Fallopian Tube'
  13. 'Gallbladder'
  14. 'Heart Muscle'
  15. 'Kidney'
  16. 'Liver'
  17. 'Lung'
  18. 'Lymph Node'
  19. 'Ovary'
  20. 'Pancreas'
  21. 'Placenta'
  22. 'Prostate'
  23. 'Rectum'
  24. 'Salivary Gland'
  25. 'Seminal Vesicle'
  26. 'Skeletal Muscle'
  27. 'Skin'
  28. 'Small Intestine'
  29. 'Smooth Muscle'
  30. 'Spleen'
  31. 'Stomach'
  32. 'Testis'
  33. 'Thyroid Gland'
  34. 'Tonsil'
  35. 'Urinary Bladder'
In [9]:
# colon-specific genes
example = as.character(TS[[8]][,1])
head(example)
  1. 'PIGY'
  2. 'NOS2'
  3. 'CEACAM7'
  4. 'NOX1'
  5. 'CLCA1'
  6. 'CLCA4'
In [16]:
SR = unique(mapIds(org.Hs.eg.db, example, 'ENTREZID', 'SYMBOL'))
SR = SR[!is.na(SR)]
gs = list(example = SR)

res = great(gr, gs, "hg19", verbose = FALSE)
tb = getEnrichmentTable(res)
'select()' returned 1:1 mapping between keys and columns

* extended_tss is already cached, directly use it.

In [17]:
# enrichment of colon-specific genes in colon MHBs with high significance
tb
A data.frame: 1 × 12
idgenome_fractionobserved_region_hitsfold_enrichmentp_valuep_adjustmean_tss_distobserved_gene_hitsgene_set_sizefold_enrichment_hyperp_value_hyperp_adjust_hyper
<chr><dbl><int><dbl><dbl><dbl><dbl><int><int><dbl><dbl><dbl>
1example0.0099769022081.4385983.721069e-073.721069e-0748792701431.1094970.13940020.1394002
In [18]:
test=tb[,c('id','p_adjust')]
test$rnkPV=rownames(test)
log=test$p_adjust>0.05
test$rnkPV[log]<-NA
test
A data.frame: 1 × 3
idp_adjustrnkPV
<chr><dbl><chr>
1example3.721069e-071

Show enrichment by ranks of significance

In [54]:
result=read.table("Exp.txt",sep="\t",header = T, row.names = 1)
In [55]:
head(result)
A data.frame: 6 × 17
AdiposeAdrenalB.cellBreastcolonesophagusheartliverlungovarypancreasplacentaspleenstomachT.cellthymusthyroid
<int><int><lgl><int><int><int><int><int><int><int><int><int><int><int><lgl><int><int>
Adipose Tissue 1NANA 5NANA 2NANANANA 3NANANANANA
Adrenal GlandNA 1NANANANANANANANANANANANANA 2NA
BreastNANANA 1NANANANANANANANANANANANANA
ColonNANANANA 1NANANANANA 2NANANANANANA
EsophagusNANANA 3NA 1NANANANANA 2NA 2NANANA
Heart MuscleNANANANANANA 1NANANANANANANANANANA
In [56]:
# mask non-significant ones
result_input=result
result_input[is.na(result_input)] <- 14
head(result_input)
A data.frame: 6 × 17
AdiposeAdrenalB.cellBreastcolonesophagusheartliverlungovarypancreasplacentaspleenstomachT.cellthymusthyroid
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
Adipose Tissue 11414 51414 214141414 31414141414
Adrenal Gland14 114141414141414141414141414 214
Breast141414 114141414141414141414141414
Colon14141414 11414141414 2141414141414
Esophagus141414 314 11414141414 214 2141414
Heart Muscle141414141414 114141414141414141414

Heatmap

In [57]:
options(repr.plot.width = 8, repr.plot.height=6, repr.plot.res = 150)
pheatmap(result_input,
    show_rownames=T,main="Tissue-specific gene",
    cluster_cols= F,angle_col=45,fontsize_row=11,cellwidth=21,
    cluster_rows=F,cellheight=18,fontsize_col=12,how_colnames = T,
    legend_breaks = c(1,14),legend_labels = c("1","14"),
    scale = "none",border_color = "grey90", display_numbers = matrix(ifelse(result_input > 1, "", "*"), nrow = nrow(result_input)),
    color = colorRampPalette(colors = c("#F71E35", "#FCD271","#fcebb6","white"))(50))