#REMOVE ME: mutDens_2cht.Rmd
#REMOVE ME: 8/3/2022
---
output:
  html_document:
    toc: true
    toc_depth: 1
    number_sections: true
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE)
library(knitr)
library(GenomicRanges)
source('mutDens.R')
# options must pre-exist.
options='optFiles/options_2cht.txt'
df <- read.delim(options,header=T,stringsAsFactors=FALSE)
options <- split(df$Value,df$Key)
attach(options)
calcSrc.cmd<- paste0('calcSrc=c(',calcSrc,')')
triCont.cmd<- paste0('triCont=c(',triCont,')')
eval(parse(text=calcSrc.cmd))
eval(parse(text=triCont.cmd))
cht1 <- unlist(strsplit(gsub('.*/','',mutF1),'.',fix=T))[1]
cht2 <- unlist(strsplit(gsub('.*/','',mutF2),'.',fix=T))[1]
proj <- paste(cht1,cht2,sep='_VS_')
if (!dir.exists(proj)) {
  dir.create(proj)
}
```
---
title: "`r proj`: Mutational Density Spatial Trend"
date: "`r format(Sys.time())`"
---
<style type="text/css">
.main-container {
  max-width: 90%;
  margin-left: auto;
  margin-right: auto;
}
</style>
---

# Mutational class distribution (Cohort 1 & Cohort 2)
```{r classDistrib,echo=F, comment=F, warning=F, dependson=F, message=F,results='asis',fig.show='hold',out.width='25%'}
gnObj<- gnStr2Obj(gn)
if (grepl('vcf$',mutF1)) {
  wgRes1 <- classDistrib_wg(mutF1,gnObj,triCont=triCont,chrCol=1,posCol=2,refCol=4,altCol=5)
  wgRes2 <- classDistrib_wg(mutF2,gnObj,triCont=triCont,chrCol=1,posCol=2,refCol=4,altCol=5)
} else {
  wgRes1 <- classDistrib_wg(mutF1,gnObj,triCont=triCont,chrCol=1,posCol=2,refCol=3,altCol=4)
  wgRes2 <- classDistrib_wg(mutF2,gnObj,triCont=triCont,chrCol=1,posCol=2,refCol=3,altCol=4)
}
frRes1 <- classDistrib_fr(wgRes1$mutS,pointsF,span=as.numeric(span))
frRes2 <- classDistrib_fr(wgRes2$mutS,pointsF,span=as.numeric(span))
if (triCont) {
  figProp='6%'
  cnt6.fr1<- flatten6(frRes1)
  cnt6.fr2<- flatten6(frRes2)
} else {
  figProp='30%'
  cnt6.fr1<- frRes1
  cnt6.fr2<- frRes2
}
plot_pieANDsym(cnt6.fr1,title=paste(cht1,'(focal)'))
plot_pieANDsym(cnt6.fr2,title=paste(cht2,'(focal)'))
```

# Mutational density curves centered on input positions
```{r shape,echo=F, comment=F, warning=F, dependson=F, message=F,results='asis',fig.show='hold',out.width=figProp}
points0 <- read.delim(pointsF,as.is=T,head=T)
Gbg <- getBinsOut(points0,as.numeric(bsz),as.numeric(outerspan),as.numeric(inspan))
if (options$sbs=='SBS6') {
	SBSs <- c('C2A','C2G','C2T','T2A','T2C','T2G')
} else {
	SBSs <- gsub(' ','',unlist(strsplit(options$sbs,',')),fix=T)
} 
mutS1<- wgRes1$mutS
mutS1<- mutS1[substr(names(mutS1),1,3)%in%SBSs]
mutS2<- wgRes2$mutS
mutS2<- mutS2[substr(names(mutS2),1,3)%in%SBSs]
nsbs<- length( unique( substr(names(mutS1),1,3) ) )
resCmprS <- resCmprS_L2 <- resCmprS_L1 <- resShapeS_C1 <- resShapeS_C2  <- vector('list',length(mutS1))
names(resShapeS_C1) <- names(resShapeS_C2) <- names(resCmprS_L1) <- names(resCmprS_L2) <- names(resCmprS) <- names(mutS1)
for (trisbs in names(mutS1) ) {
	sbs<- substr(trisbs,1,3)
	mut1 <- mutS1[[trisbs]]
	mut2 <- mutS2[[trisbs]]
	if (nrow(mut1)>0 & nrow(mut2)>0) {
		totMut1 <- as.vector(table(mut1$strand)[c('+','-')])
		totMut2 <- as.vector(table(mut2$strand)[c('+','-')])
		ll.res1 <- paramLocal(mut1,Gbg,shapeModel)
		ll.res2 <- paramLocal(mut2,Gbg,shapeModel)
		resX1 <- mutDensX(mut1,points0,sbs,bsz=as.numeric(bsz),span=as.numeric(span),gFt=gFt,gn=gn,calcSrc=calcSrc)
		resX2 <- mutDensX(mut2,points0,sbs,bsz=as.numeric(bsz),span=as.numeric(span),gFt=gFt,gn=gn,calcSrc=calcSrc)
		resShapeC1 <- mutDensShape(resX1$mutCnt,ll.res1$lamda2,ll.res1$nbparam2,shapeModel=shapeModel,pTh=1e-5)
		resShapeC2 <- mutDensShape(resX2$mutCnt,ll.res2$lamda2,ll.res2$nbparam2,shapeModel=shapeModel,pTh=1e-5)
		colnames(resShapeC1) <- colnames(resShapeC2) <- paste(sbs,colnames(resShapeC1),sep='.')
		rownames(resShapeC1) <- paste('Cohort1',rownames(resShapeC1),sep='.')
		rownames(resShapeC2) <- paste('Cohort2',rownames(resShapeC2),sep='.')
    resShapeS_C1[[trisbs]] <- resShapeC1
    resShapeS_C2[[trisbs]] <- resShapeC2
		resCmprL1 <- mutDensCmpr(rbind(resX1$mutDens[1,],resX2$mutDens[1,]),c(totMut1[1],totMut2[1])) # Mutation on '+'
		resCmprL2 <- mutDensCmpr(rbind(resX1$mutDens[2,],resX2$mutDens[2,]),c(totMut1[2],totMut2[2]))
		resCmpr <- mutDensCmpr(rbind(colSums(resX1$mutDens),colSums(resX2$mutDens)),c(sum(totMut1),sum(totMut2)))
		colnames(resCmprL1) <- colnames(resCmprL2) <- colnames(resCmpr) <- paste(trisbs,colnames(resCmpr),sep='.')
		rownames(resCmprL1) <- paste('L1',rownames(resCmprL1),sep='.')
		rownames(resCmprL2) <- paste('L2',rownames(resCmprL2),sep='.')
		resCmprS[[trisbs]] <- signif(resCmpr,3)
		resCmprS_L1[[trisbs]] <- signif(resCmprL1,3)
		resCmprS_L2[[trisbs]] <- signif(resCmprL2,3)
	  write.table(resCmprL1,paste0(proj,'/',trisbs,'.resCmpr_L1.txt'))
  	write.table(resCmprL2,paste0(proj,'/',trisbs,'.resCmpr_L2.txt'))
		write.table(resCmpr,paste0(proj,'/',trisbs,'.resCmpr.txt'))
  	write.table(resShapeC1,paste0(proj,'/',trisbs,'.resShape_C1.txt'))
  	write.table(resShapeC2,paste0(proj,'/',trisbs,'.resShape_C2.txt'))
		lnames <- paste(rep(c(cht1,cht2),each=2),rep(sbs2mut2(sbs),2))
		plot4mutDens(resX1$mutDens,resX2$mutDens,totMut1,totMut2,bsz=as.numeric(bsz),title=gsub('2','>',trisbs),lnames=lnames,legend=T)
	} else {
		plot.new()
	}
}
```

# Peaks/Dips detected from Mutational density curves
<font size="4">
*Two super-tables are shown below, for each cohort respectively. One super-table may include one or multiple tables, corresponding to the mutational class(es) designated in the option file. The column headers have a prefix indicating the specific mutational class (e.g., C2A, which means C>A).*

Each table has four rows and six columns. The first three rows tell if a central peak (Peak), left peak (PeakL), or a right peak (PeakR) is detected. The fourth row tells if a central dip (Dip) is detected. The six six values in a row can be divided into two 3-tuples, where each tuple comprises three values: p-values (out of Poisson or  Negative Binomial distribution), left bin number, and right bin number. Two 3-tuples are necessitated for the two density curves respectively. The two curves are denoted with prefix L1 and L2, rendered by default in orange and blue, and they correspond to the nominal mutation form (e.g., C>A) and its reverse complementary form (e.g., G>T), respectively.     
</font>
```{r showTable1,echo=F, comment=F, warning=F, dependson=F, message=F,results='asis'}
kable(resShapeS_C1,format='markdown')
kable(resShapeS_C2,format='markdown')
```

# Statistical tests of mutation densities (Wilcoxon and trend-test)
<font size="4">
*Tables below show results of statistical comparisons of mutational density values (MPKM) between two cohorts. The two mutually complementary nominal forms, e.g., C>A and G>T, are treated separately, along with their combined superset. Hence, three super-tables are shown below. The first two super-tables are identified with such prefixes to their row names: L1 and L2, designating the first and second forms (or curves) in the pair of the mutational class.  
Each super-table may include one or multiple tables corresponding to the mutational class(es) designated in the option file. The column headers have a prefix indicating the specific mutational class (e.g., C2A).*

Each table has four rows and three columns. The first row shows  the mean difference of the density values in the two curves, the second row shows paired Wilcoxon rank test p-values between the density values in the two curves, and the third and fourth rows show the p-values out of the WAVK trend test from R package funtimes to detect any non-random spatial trend. The three columns refer to different ranges of the curves, namely whole (whole range), left (left half range), and right (right half range).

</font>
```{r showTable2,echo=F, comment=F, warning=F, dependson=F, message=F,results='asis'}
kable(resCmprS_L1,format='markdown')
kable(resCmprS_L2,format='markdown')
kable(resCmprS,format='markdown')
detach(options)
```
