library(compEpiTools)
source(file.path('primary_data_analysis','00_library_of_functions.R'))
load(file.path('data','allsysRNA.RData'))
load(file.path('data','allMycChIP.RData'))

tags <- c('MycER','Waltz2014','p493','EuMyc','TetMyc')
mains <- c('3T9MycER','U2OSTet-Myc','p493-6','Eu-myc','LiverTet-Myc')

mycShareData <- lapply(tags, function(tag) {
	print(tag)
	rnaseq <- allsysRNA[[tag]]
	chipseq <- subset(allMycChIP[[tag]]$tss, nchar(allMycChIP[[tag]]$tss$bound)>0)
	share_data <- merge(rnaseq, chipseq, by='EntrezID', suffixes = c('_RNA','_ChIP'))
	share_data <- na.omit(share_data)
	})
names(mycShareData) <- tags

#################
## panel A ########
#################

print('panel A')

binTheTrend <- function(x, y, n, center=median) {
	bins <- ceiling(rank(x)/ceiling(length(x)/n))
	xBinned <- tapply(x, bins, center)
	yBinned <- tapply(y, bins, center)
	return(data.frame(x=xBinned, y=yBinned))
}

pdf(file.path('figures','S1A.pdf'), height=3*length(tags), width=6)
par(mfrow=c(length(tags),2))
for(i in seq_along(tags)) {

	tag <- tags[i]
	share_data <- mycShareData[[tag]]
	mycFC <- share_data$log2ratio_ChIP
	rnaFC <- share_data$log2ratio_RNA

	binnedData <- binTheTrend( mycFC[rnaFC<0], rnaFC[rnaFC<0] , 25)
	with(binnedData, plot(x, y, pch=20, xlab='myc Log2 ratio', ylab='rna Log2 ratio', 
		main=paste0(mains[i]," DOWN\n( spearman's correlation=",round(cor(x,y,method='s'),2),", p=", 
			signif(cor.test(x,y,method='s')$p.value,2)," )")))
	with(binnedData, abline(lm(y~x),lty=1,lwd=2,col='blue'))

	binnedData <- binTheTrend( mycFC[rnaFC>0], rnaFC[rnaFC>0] , 25)
	with(binnedData, plot(x, y, pch=20, xlab='myc Log2 ratio', ylab='rna Log2 ratio', 
		main=paste0(mains[i]," UP\n( spearman's correlation=",round(cor(x,y,method='s'),2),", p=", 
			signif(cor.test(x,y,method='s')$p.value,2)," )")))
	with(binnedData, abline(lm(y~x),lty=1,lwd=2,col='red'))

}

dev.off()

#############
## panel B #####
##############

print('panel B')

load(file=file.path('tables','TetmycDepIndepEIDs.RData'))

share_data <- mycShareData[['TetMyc']]
shareline <- share_data$zeroshare[1]

pdf(file.path('figures','S1B.pdf'), height=5, width=10)
par(mfrow=c(1,2))

# myc dependent

mycFC <- subset(share_data, share_data$EntrezID %in% TetmycDepEIDs)$log2ratio_ChIP
rnaFC <- subset(share_data, share_data$EntrezID %in% TetmycDepEIDs)$log2ratio_RNA

smoothXYlims(mycFC, rnaFC, xlab='myc Log2 ratio', ylab='rna Log2 ratio', 
	xlim=quantile(mycFC, c(.005,.995)), ylim=quantile(rnaFC, c(.005,.995)), main=mains[i])

plotLoess( mycFC[rnaFC>0], rnaFC[rnaFC>0] , col='red', lwd=3)
plotLoess( mycFC[rnaFC<0], rnaFC[rnaFC<0] , col='blue', lwd=3)
plotLoess( mycFC, rnaFC , col='darkgoldenrod2', lwd=3)

abline(h=0, lty=2)
abline(v=shareline, lty=2)

legend('topleft', legend = c(
	paste('induced =',round(cor(mycFC[rnaFC>0], rnaFC[rnaFC>0],method='s'),2)),
	paste('repressed =',round(cor(mycFC[rnaFC<0], rnaFC[rnaFC<0],method='s'),2)),
	paste('all =',round(cor(mycFC, rnaFC, method='s'),2))
	), cex=1.1, bty='n', text.col = c('red','blue','darkgoldenrod2'))

# myc independent

mycFC <- subset(share_data, share_data$EntrezID %in% TetmycIndEIDs)$log2ratio_ChIP
rnaFC <- subset(share_data, share_data$EntrezID %in% TetmycIndEIDs)$log2ratio_RNA

smoothXYlims(mycFC, rnaFC, xlab='myc Log2 ratio', ylab='rna Log2 ratio', 
	xlim=quantile(mycFC, c(.005,.995)), ylim=quantile(rnaFC, c(.005,.995)), main=mains[i])

plotLoess( mycFC[rnaFC>0], rnaFC[rnaFC>0] , col='red', lwd=3)
plotLoess( mycFC[rnaFC<0], rnaFC[rnaFC<0] , col='blue', lwd=3)
plotLoess( mycFC, rnaFC , col='darkgoldenrod2', lwd=3)

abline(h=0, lty=2)
abline(v=shareline, lty=2)

legend('topleft', legend = c(
	paste('induced =',round(cor(mycFC[rnaFC>0], rnaFC[rnaFC>0],method='s'),2)),
	paste('repressed =',round(cor(mycFC[rnaFC<0], rnaFC[rnaFC<0],method='s'),2)),
	paste('all =',round(cor(mycFC, rnaFC, method='s'),2))
	), cex=1.1, bty='n', text.col = c('red','blue','darkgoldenrod2'))

dev.off()

############### 
## panel C ########
##############

print('panel C')

library(pROC)

pdf(file.path('figures','S1C.pdf'), height=5, width=5)

AUCs <- numeric(2)

mycFC <- subset(share_data, share_data$EntrezID %in% TetmycDepEIDs)$log2ratio_ChIP
rnaFC <- subset(share_data, share_data$EntrezID %in% TetmycDepEIDs)$log2ratio_RNA

rocCurve <- roc(rnaFC>0, mycFC)
AUCs[1] <- as.numeric(rocCurve$auc)

plot(rocCurve, col='red')
idx <- which.max(rocCurve$thresholds>shareline)
points(rocCurve$specificities[idx], rocCurve$sensitivities[idx], pch=20, cex=1.5, col='red')

mycFC <- subset(share_data, share_data$EntrezID %in% TetmycIndEIDs)$log2ratio_ChIP
rnaFC <- subset(share_data, share_data$EntrezID %in% TetmycIndEIDs)$log2ratio_RNA

rocCurve <- roc(rnaFC>0, mycFC)
AUCs[2] <- as.numeric(rocCurve$auc)

lines(rocCurve, col='green')
idx <- which.max(rocCurve$thresholds>shareline)
points(rocCurve$specificities[idx], rocCurve$sensitivities[idx], pch=20, cex=1.5, col='green')

legend('bottomright', legend=paste(c('Myc dependent genes\n','Myc independent genes\n'), '( AUC =', round(AUCs,2), ')'), 
	col=c('red','green'), lty=1, bty='n')

dev.off()

###############
## panel D #######
##############

print('panel D')

pdf(file.path('figures','S1D.pdf'), height=9, width=6)
par(mfrow=c(3,2))
for( i in seq_along(tags) )
{
	tag <- tags[i]
	chipseq <- allMycChIP[[tag]]$peaks
	tsspeaks <- unlist(
		strsplit(allMycChIP[[tag]]$tss$bound,',')
		)
	chipseq <- chipseq[rownames(chipseq) %in% tsspeaks,]
	smoothXYlims(chipseq$log2ratio, chipseq$low, 
		xlim = quantile(chipseq$log2ratio, c(0.001,.999)),
		ylim = quantile(chipseq$low, c(0.001,.999)),
		xlab='log2( High-Myc / Low-Myc )', ylab='log2 Low Myc', main=mains[i])
	plotLoess(chipseq$log2ratio, chipseq$low, qt=c(0.001,.999), col='magenta',lwd=2)
	abline(v=0, lty=3)
	abline(v=chipseq$zeroshare[1])
}
dev.off()

# ###################
# ## panel E-F #######
# #################

print('panel E-F')

load(file.path('data','allMiz1ChIP.RData'))

MycMiz1Plot <- function( rnaseq , mycChip , mizChip )
{

	chipID <- union(mycChip$EntrezID, mizChip$EntrezID)
	rnaseq <- subset(rnaseq, rnaseq$EntrezID %in% chipID)
	rnaseq <- subset(rnaseq, !duplicated(rnaseq$EntrezID))

	colnames(rnaseq) <- c('EntrezID','log2ratio_rna','qvalue_rna')

	rnaseq$log2ratio_rnaCols <- vec2cols(
		rnaseq$log2ratio_rna, 
		cols=colorRampPalette(c('blue','white','red'))(20), 
		breaks=seq(-1,1,by=.1))

	diffexpressed <- rnaseq$EntrezID[which(rnaseq$qvalue_rna<=.1)]
	mycBound <- intersect( mycChip$EntrezID[nchar(mycChip$bound)>0] , mizChip$EntrezID )
	mizBound <- intersect( mizChip$EntrezID[nchar(mizChip$bound)>0] , mycChip$EntrezID )

	geneSet <- intersect(diffexpressed, union(mycBound, mizBound))

	print(paste('considering', length(geneSet), 'genes bound out of the', 
		length(diffexpressed), 'differentially  expressed'))

	chipData <- merge(
		data.frame(
			EntrezID = mycChip$EntrezID,
			myc_low = mycChip$low,
			deltashare = mycChip$log2ratio - mycChip$zeroshare,
			stringsAsFactors = FALSE
			),
		data.frame(
			EntrezID = mizChip$EntrezID,
			miz_low = mizChip$low,
			stringsAsFactors = FALSE
			),
		by = 'EntrezID'
		)
	chipData$myc_low <- chipData$myc_low - chipData$miz_low
	colnames(chipData) <- c('EntrezID','log2ratio_mycmiz1','log2ratio_myc','miz1')
	allData <- merge(rnaseq, chipData, by='EntrezID')
	filteredData <- subset(allData, allData$EntrezID %in% geneSet)

	with( filteredData, {

		plot( miz1, log2ratio_myc, 
			xlab='Miz1', ylab='Myc LogRatio',
			pch=20, cex=.5, col=log2ratio_rnaCols
			)

		abline2D ( lm2 <- lm( log2ratio_rna ~ miz1 + log2ratio_myc ) )
		legend('topright', bty='n', 
			legend=paste0('cor=',round(cor( log2ratio_rna, lm2$fitted.value , method='s'),2)))

		# plot( log2ratio_mycmiz1, log2ratio_myc, 
		# 	xlab='Myc/Miz1', ylab='Myc LogRatio',
		# 	pch=20, cex=.5, col=log2ratio_rnaCols
		# 	)

		lm4 <- lm( log2ratio_rna ~ log2ratio_mycmiz1 + log2ratio_myc )
		# abline2D ( lm4 )
		# legend('topright', bty='n', 
		# 	legend=paste0('cor=',round(cor( log2ratio_rna, lm4$fitted.value , method='s'),2)))

		predictors <- cbind(
			Miz1 = miz1, 
			DeltaMycShare = log2ratio_myc,
			Miz1_DeltaMycShare = lm2$fitted.value,
			MycMizRatio_DeltaMycShare = lm4$fitted.value
			)

		response <- log2ratio_rna>0

		plot( 0, 0, pch='', xlim=c(1,0), ylim=c(0,1), 
			xlab = 'Specificity', ylab='Sensitivity'
			)
		abline(1,-1,col='grey')
		colors <- c('black','darkgoldenrod2','red','blue')
		aucs <- sapply( 1:ncol(predictors), function(i) {
			lines(rc<-roc(response, predictors[,i]), col=colors[i], lty=1)
			as.numeric(auc(rc))
			})
		legend('bottomright', 
			legend = paste(colnames(predictors), round(aucs,2), sep=' = '), 
			col = colors, lty = 1, bty = 'n'
			)
	})

}

pdf(file.path('figures','S1EF.pdf'), height=9, width=6)

par(mfrow=c(3,2))

MycMiz1Plot(
	rnaseq = allsysRNA$Waltz2014,
	mycChip = allMycChIP$Waltz2014$tss,
	mizChip = allMiz1ChIP$Waltz2014$tss
	)

MycMiz1Plot(
	rnaseq = allsysRNA$MycER,
	mycChip = allMycChIP$MycER$tss,
	mizChip = allMiz1ChIP$MycER$tss
	)

MycMiz1Plot(
	rnaseq = allsysRNA$EuMyc,
	mycChip = allMycChIP$EuMyc$tss,
	mizChip = allMiz1ChIP$EuMyc$tss
	)

dev.off()

###############
## panel G #######
################

print('panel G')

tet <- read.table(file.path('tables','liver_tetMyc_RNAseq.xls'), header=T)
tet <- tet[tet$T_average != 0, ]

dep_up <- with(subset(tet, tet$Myc_dep_UP == 'yes'), log2(Tvd_average) - log2(T_average) )
dep_dn <- with(subset(tet, tet$Myc_dep_DN == 'yes'), log2(Tvd_average) - log2(T_average) )
allgen <- with(tet, log2(Tvd_average) - log2(T_average) )

p_min <- 1e-2; p_max <- 1 - p_min
x_bins <- seq( quantile(allgen, p_min) , quantile(allgen, p_max) , length.out = 512)
y_dep_up <- density(dep_up, from = quantile(allgen, p_min), to = quantile(allgen, p_max))$y
y_dep_dn <- density(dep_dn, from = quantile(allgen, p_min), to = quantile(allgen, p_max))$y
y_allgen <- density(allgen, from = quantile(allgen, p_min), to = quantile(allgen, p_max))$y

pdf(file.path('figures','S1G.pdf'), height= 4 , width = 4)

matplot(x_bins, cbind(y_allgen, y_dep_dn, y_dep_up), 
	type='l', lty=c(1,1,1), lwd = c(1,2,2),
	xlab = 'log2( Tvd / T )' , ylab = 'Density', 
	col = c('black','chartreuse3','darkgoldenrod2')
	)
abline(v=0, lty=3)

legend('topright', legend = c('primary up\n(p < 2.2e-16)', 'all genes', 'primary down\n(p < 2.2e-16)'),
	lty = c(1, 2, 1), col = c('darkgoldenrod2' , 'black' , 'chartreuse3') , bty = 'n', cex = .7)

dev.off()

print(wilcox.test(allgen, dep_up, 'greater'))
print(wilcox.test(allgen, dep_dn, 'less'))


