writeMatplot<- function(bed1, bed2, file.plus.bw, file.minus.bw, hMarkFile, title, subs= NULL, breaks= NULL,
							        cols= NULL, dist= 5000, step=25)
{
	library(bigWig);

	## Load mark.
	hMark <- load.bigWig(hMarkFile)

	mat1 <- mat2 <- NULL;

	if(NROW(bed1)!=0)
	{
		#hCountMatrix <- bed.step.bpQuery.bigWig(hMark, center.bed(bed1[,c(1,5,5)], dist, dist), step=step, abs.value=TRUE)
		#hmat1 <- matrix(unlist(hCountMatrix), nrow= NROW(bed1), byrow=TRUE);
		#mat1 <- colMeans(hmat1);
		hmat1 <- metaprofile.bigWig(center.bed(bed1[,c(1,5,5)], dist, dist), hMark, step=step)
		mat1 <- abs(hmat1$middle)
	}

	if(NROW(bed2)!=0)
	{
		#hCountMatrix <- bed.step.bpQuery.bigWig(hMark, center.bed(bed2[,c(1,5,5)], dist, dist), step=step, abs.value=TRUE)
		#hmat2 <- matrix(unlist(hCountMatrix), nrow= NROW(bed2), byrow=TRUE);
		#mat2 <- colMeans(hmat2);
		hmat2 <- metaprofile.bigWig(center.bed(bed2[,c(1,5,5)], dist, dist), hMark, step=step)
		mat2 <- abs(hmat2$middle)
	}
    unload.bigWig(hMark);

	par( mar=c(5,4,2,2), plt=c(0.15, 0.99, 0.2, 0.8), mgp=c(1.5,0.4,0) );

	if(!is.null(mat1) || !is.null(mat2))
		y.max <- max(c(mat1, mat2))
	else
		y.max <- 10;

	plot(NA, NA, type="n", col="gray", xlim=c(1,ifelse(is.null(mat1), NROW(mat2), NROW(mat1))), ylim=c(0, y.max)/1000,  xlab="Distance (Kbp) ", ylab="", cex.axis=1/3, cex.lab=1/3,  cex.main=1/3, xaxt = "n", yaxt="n", main=title)

	if(!is.null(mat1)) lines(1:NROW(mat1), mat1/1000, col="blue", lwd=1/8);
	if(!is.null(mat2)) lines(1:NROW(mat2), mat2/1000, col="black", lwd=1/8);

	axis(1, c(0, 100, 200, 300, 400), c(-dist/1000, -dist/2000, 0, dist/2000, dist/1000), cex.axis=1/3, cex=1/3 )
	if( y.max>10 )
		axis(2, c(0, y.max/2000, y.max/1000), c(0, round(y.max/2), round(y.max)), cex.axis=1/3, cex=1/3 )
	else if( y.max>=1 )
		axis(2, c(0, y.max/2000, y.max/1000), c(0, round(y.max/2, digits=1), round(y.max,digits=1)), cex.axis=1/3, cex=1/3 )
	else
		axis(2, c(0, y.max/2000, y.max/1000), c(0, signif(y.max/2,digits=2), signif(y.max,digits=2)), cex.axis=1/3, cex=1/3 )

	return()
}


file.plus.bw       <- "../k562/K562_unt.sort.bed.gz_plus.bw"
file.minus.bw      <- "../k562/K562_unt.sort.bed.gz_minus.bw"

file.H3K27me3.bw   <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k27me3StdSig.bigWig"
file.H3K4me3.bw    <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k4me3StdSig.bigWig"
file.H3K4me1.bw    <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k4me1StdSig.bigWig"
file.H3K27ac.bw    <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k27acStdSig.bigWig"
file.MNase.bw      <- "/fs/cbsudanko/storage/data/hg19/k562/sydh_mnase/wgEncodeSydhNsomeK562Sig.bigWig"
file.dnase.bw      <- "/fs/cbsudanko/storage/data/hg19/k562/dnase/wgEncodeOpenChromDnaseK562SigV2.bigWig";

file.dnase.peak    <- "/fs/cbsudanko/storage/data/hg19/k562/dnase/wgEncodeOpenChromDnaseK562PkV2.narrowPeak.gz";
file.H3K27me3.peak <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k27me3StdPk.broadPeak.gz"
file.H3K4me3.peak  <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k4me3StdPk.broadPeak.gz"
file.H3K4me1.peak  <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k4me1StdAln.bed.gz"
file.H3K27ac.peak  <- "/fs/cbsudanko/storage/data/hg19/k562/histones/wgEncodeBroadHistoneK562H3k27acStdAln.bed.gz"

file.G1 <- "../new-rf-201803/G1/G1.dREG.peak.full.bed.gz"

file.TF.chipseq <- "/fs/cbsudanko/storage/data/hg19/all/ENCODE_tf_peak_calls/wgEncodeRegTfbsClusteredWithCellsV3.bed.gz"
file.DHS.Duke <- "/fs/cbsudanko/storage/data/hg19/k562/dnase/wgEncodeOpenChromDnaseK562PkV2.narrowPeak.gz"
file.DHS.UW   <- "/fs/cbsudanko/storage/data/hg19/k562/dnase/GSM646567_hg19_wgEncodeUwDgfK562Pk.narrowPeak.txt.gz"

DHS.conservative<-function()
{
	#tb.duke <- read.table(pipe(paste("bedtools intersect -a <(zcat ", file.DHS.Duke, ") -b <(zcat ", file.DHS.UW, ") -wa", sep="")));
	#tb.uw <- read.table(pipe(paste("bedtools intersect -b <(zcat ", file.DHS.Duke, ") -a <( zcat ", file.DHS.UW, ") -wa", sep="")));

	file.temp.duke = tempfile(fileext=".bed");
	file.temp.uw = tempfile(fileext=".bed");
	file.temp.dhs = tempfile(fileext=".bed");

	tb.duke <- read.table(file.DHS.Duke);
	tb.uw <- read.table(file.DHS.UW);
	write.table(tb.duke[,1:3], file=file.temp.duke, quote=F, col.names=F, row.names=F, sep="\t");
	write.table(tb.uw[,1:3], file=file.temp.uw, quote=F, col.names=F, row.names=F, sep="\t");

	#tb.ud1 <- read.table(pipe(paste("bedtools intersect -a ",file.temp.duke," -b ",file.temp.uw," -wa", sep="")));
	#tb.ud2 <- read.table(pipe(paste("bedtools intersect -b ",file.temp.duke," -a ",file.temp.uw," -wa", sep="")));
	#tb <- rbind(tb.ud1[,c(1:3)], tb.ud2[,c(1:3)] );
	#write.table(tb, file=file.temp.dhs, quote=F, col.names=F, row.names=F, sep="\t");
	#tb <- read.table(pipe(paste("sort-bed ", file.temp.dhs, " | bedtools merge  -i -")));

	tb <- read.table(pipe(paste("bedtools intersect -a ",file.temp.duke," -b ",file.temp.uw, sep="")));

	return(tb);
}


DHS.lessconservative<-function()
{
	tb.duke <- read.table(file.DHS.Duke);
	tb.uw <- read.table(file.DHS.UW);

	file.temp.dhs = tempfile(fileext=".bed");
	tb <- rbind(tb.duke[,c(1:3)], tb.uw[,c(1:3)] );
	write.table(tb, file=file.temp.dhs, quote=F, col.names=F, row.names=F, sep="\t");

	tb <- read.table(pipe(paste("sort-bed ", file.temp.dhs," | bedtools merge  -i -")));
	return(tb);
}

get_DDplus<-function()
{
	tb.dhs <- DHS.conservative()
	file.temp.DHS = tempfile(fileext=".bed");
	write.table(data.frame(unique(tb.dhs[,1:3]),1), file=file.temp.DHS, quote=F, row.names=F, col.names=F, sep="\t")

	tb.dtd <- read.table(pipe(paste("zcat ", file.TF.chipseq, " | grep K562 | cut -f 1,2,3,4 | bedtools intersect -a ", file.G1, " -b - -loj", " | bedtools intersect -a - ", " -b ", file.temp.DHS, " -loj" )));
	tb.dtd <- tb.dtd[tb.dtd$V7!="." & tb.dtd$V11!=".",]
	tb.dtd <- unique(tb.dtd[,c(1:3,4,6,10)]);

	return(tb.dtd);
}

get_DDminus<-function()
{
	tb.dhs <- DHS.lessconservative()

	file.temp.DHS=tempfile(fileext=".bed");
	write.table(data.frame(unique(tb.dhs[,1:3]),1), file=file.temp.DHS, quote=F, row.names=F, col.names=F, sep="\t")

	tb.dtd <- read.table(pipe(paste("zcat ", file.TF.chipseq, " | grep K562 | cut -f 1,2,3,4 | bedtools intersect -a ", file.G1, " -b - -loj", " | bedtools intersect -a - ", " -b ", file.temp.DHS, " -loj" )));
	tb.dtd <- tb.dtd[tb.dtd$V7!="." & tb.dtd$V11!=".",]
	tb.dtd <- unique(tb.dtd[,c(1:3,4,6,10)]);

	tb.dt <- read.table(pipe(paste("zcat ", file.TF.chipseq, " | grep K562 | cut -f 1,2,3,4 | bedtools intersect -a ", file.G1, " -b - -loj" )));
	tb.dt <- tb.dt[tb.dt$V7!=".",]
	tb.dt <- unique(tb.dt[,c(1:3,4,6,10)]);

	idx.dtminus <- match(paste(tb.dtd[,1], tb.dtd[,2],tb.dtd[,3], tb.dtd[,6], sep=":"), paste(tb.dt[,1], tb.dt[,2], tb.dt[,3], tb.dt[,6], sep=":"))
	tb.dtminus <- tb.dt[-idx.dtminus,]

	return(tb.dtminus);
}

resample <- function( ref, orig, n=10000, nbins=10) {
		## (1) Break gcContent down into 10 equally sized bins.
		breaks <- seq(0, max(orig), length.out=nbins)

		## (2) Get the empirical frequencies of each bin.
		empir <- sapply(1:(NROW(breaks)-1), function(x) {sum(breaks[x]<= ref & ref < breaks[x+1])/NROW(ref)})

		## (3) Re-sample TREs in the BG set w/ probability proportional to the bin.
		resamp_prob <- rep(1, NROW(orig))
		for(i in 1:NROW(empir)) {
			incl <- breaks[i] <= orig & orig < breaks[i+1]
			resamp_prob[incl] <- empir[i]/ sum(incl)
		}
		sample(1:NROW(resamp_prob), n, prob= resamp_prob, replace=FALSE)
}


try.sample<-function( ref, orig, n.sample)
{
		indx.bgnew <- resample( ref, orig, n=n.sample, nbins=100)
		gc.testx <- wilcox.test(ref, orig[indx.bgnew], conf.int=TRUE, conf.level=0.9 );
		return(list(pv=gc.testx$p.value, index=indx.bgnew));
}


DDplus  <- get_DDplus();
DDminus <- get_DDminus();
fold_cv <- 0.9
sl <- (1e-3)/2

library(parallel)
library(vioplot)
library(boot)
library(bigWig);


if(0)
{
    DDplus4 <- unique(DDplus[,c(1:4)])
    DDminus4 <- unique(DDminus[,c(1:4)])

	max.prob <- 0;
	idx.max.plus = c()
	idx.max.minus = c()
    n.sample=5000;

	for(i in 1:100)
	{
		idx.minus = sample(1:NROW(DDminus4))[1:n.sample]
		r.sample= try.sample(DDminus4[idx.minus, 4], DDplus4[,4], n.sample=5000);

		cat(r.sample$pv, mean(DDplus4[r.sample$index,4]), mean(DDminus4$V4[idx.minus]), "\n");

		if(r.sample$pv > max.prob)
		{
			idx.max.plus = r.sample$index;
			idx.max.minus = idx.minus;
			max.prob = r.sample$pv
		}
	}

	pdf("voiplots.pdf");
	vioplot(DDplus4[idx.max.plus,4], DDminus4$V4[idx.max.minus], names=c("Positive", "Negative"))
	dev.off();

	reg.minus <- paste(DDminus4[idx.max.minus,1], DDminus4[idx.max.minus,2], DDminus4[idx.max.minus,3], sep=":");
	reg.plus  <- paste(DDplus4[idx.max.plus,1], DDplus4[idx.max.plus,2], DDplus4[idx.max.plus,3], sep=":");

	tb.dtminus <- DDminus[ paste(DDminus[,1], DDminus[,2], DDminus[,3], sep=":") %in% reg.minus,];
	tb.dtd <- DDplus[  paste(DDplus[,1], DDplus[,2], DDplus[,3], sep=":")   %in% reg.plus,];

show(dim(tb.dtminus));
show(dim(tb.dtd));

	tb.dREG <- unique(rbind(tb.dtminus[,1:4],tb.dtd[,1:4]));
	file.dREG.sample <- tempfile(fileext=".bed");
	write.table(tb.dREG, file=file.dREG.sample, quote=F, row.names=F, col.names=F, sep="\t");

	tbo <- read.table(pipe(paste("zcat ", file.TF.chipseq, " | grep K562 | bedtools intersect -a ", file.dREG.sample, " -b - -loj")));
	tbo <- tbo[tbo$V5!=".",];
	tbo <- tbo[,-c(9,10)];

	TF.names <- unique(tbo[,8]);
	TF.vec0 <- rep(0, NROW(TF.names));
	names(TF.vec0) <- TF.names;

	mat.TFs <-  do.call("rbind", mclapply( 1:NROW(tb.dREG), function(i){
	#mat.TFs <- do.call("rbind", mclapply( 1:200, function(i){
		idx <- which( tbo[,1]== as.character(tb.dREG[i,1]) & tbo[,2]==tb.dREG[i,2] & tbo[,3]==tb.dREG[i,3] );
		TF.vec <- TF.vec0;
		TF.vec[as.character(unique(tbo[idx, 8]))] <- 1;
		return(TF.vec);
	  }, mc.cores=30) );


	tb.dhs <- DHS.conservative()
	file.temp.DHS = tempfile(fileext=".bed");
	write.table(data.frame(unique(tb.dhs[,1:3]),1), file=file.temp.DHS, quote=F, row.names=F, col.names=F, sep="\t")

	tbh <- read.table(pipe(paste(" bedtools intersect -a ", file.G1, " -b ",  file.temp.DHS, "-wa")));
	DHS.status <- unlist(mclapply(1:NROW(tb.dREG), function(i){
		idx <- which( tbh[,1] == as.character(tb.dREG[i,1]) & tbh[,2]==tb.dREG[i,2] & tbh[,3]==tb.dREG[i,3] );
		return( NROW(idx) == 0 );
	  }, mc.cores=30) );

	colnames(tb.dREG)<-c("chr", "start","end","score");
	dreg.mat <- data.frame( tb.dREG, y=DHS.status, mat.TFs);

	train <- sample(1:NROW(dreg.mat))[1:(NROW(dreg.mat)*fold_cv)]
	test  <- rep(TRUE, NROW(dreg.mat));
	test[train] <- FALSE;
	test <- which(test)

	## Now the regression.
	df <- dreg.mat[,c(4,5)];
	sm <- glm(y~score, family=binomial, data=df[train,])	## Strawman model.
	df <- dreg.mat[,-c(1:3)];
	tf <- glm(y~., family=binomial, data=df[train,])			## Model with ONLY tfs.

	df <- dreg.mat[,c(4,5)];
	scores_sm <- predict(sm, df[test,])
	df <- dreg.mat[,-c(1:3)];
	scores_tf <- predict(tf, df[test,])

	require(dREG)
	roc_sm <- logreg.roc.calc(dreg.mat$y[test], scores_sm)
	roc_tf <- logreg.roc.calc(dreg.mat$y[test], scores_tf)

	roc.auc(roc_sm)
	# 0.5197671
	roc.auc(roc_tf)
	#0.9245187

	pdf("roc.curve.pdf")
	roc.plot(roc_sm, xlim=c(0,1), ylim=c(0,1), col="dark gray")
	par(new = TRUE)
	roc.plot(roc_tf, xlim=c(0,1), ylim=c(0,1), col="black")
	dev.off();


	## Now use bootstrap to test the regression coefficients difference from 0.
	require(boot);
	if(0)
	{
	bb <- boot(data= dreg.mat[,-c(1:3)], R= 1000, statistic= function(a, i) {
	   rc <- sample(c(2:NCOL(a)), 1)## Select a random column to leave out.
	   vals <- glm(y~., family=binomial, data=a[i,-1*rc])$coefficients
	   if(rc==NCOL(a)) {
	    ans  <- c(vals, NA)
	   } else {
	    ans  <- c(vals[1:(rc-1)], NA, vals[(rc):NROW(vals)])
	   }
	   names(ans) <- c("Intercept", colnames(a[2:NCOL(a)]))
	   return(ans)
	 },ncpus=30,parallel="multicore")
	}

	bb <- boot(data= dreg.mat[,-c(1:3)], R= 1000, statistic= function(a, i) {
	   vals <- glm(y~., family=binomial, data=a[i,])$coefficients
		vals
	},ncpus=30,parallel="multicore")

	std.error <- sapply(1:NROW(bb$t0) , function(x) {sd(bb$t[,x], na.rm=TRUE)})
	sig <- sapply(1:NROW(bb$t0) , function(x) {!xor(quantile(bb$t[,x], sl, na.rm=TRUE)>0, quantile(bb$t[,x], 1-sl, na.rm=TRUE)>0)}) # 0.025 0.975

	source("https://raw.githubusercontent.com/Danko-Lab/dREG/master/dREG_paper_analyses/train_svm/erna_regression/erna_drawbars.R");

	save(dreg.mat, bb, file="roc.boot.rdata");

	pdf("roc.bot.pdf")
	drawBars(bb$t0, std.error, names(bb$t0))
	drawBars(bb$t0[sig], std.error[sig], names(bb$t0)[sig])
	drawBarsVertical(bb$t0[sig], std.error[sig], names(bb$t0)[sig])
	dev.off();

}

if(0)
{
	tb.dtminus = DDminus
	tb.dtd = DDplus

	BWs=c(file.plus.bw,
		  file.minus.bw,
		  file.dnase.bw,
		  file.MNase.bw,
		  file.H3K27ac.bw,
		  file.H3K27me3.bw,
		  file.H3K4me1.bw,
		  file.H3K4me3.bw)

	TFs=c(
		"CEBPB",
		"POLR2A",
		"PML",
		"TAF1",
		"RPC155",
		"CHD1",
		"SMARCA4")

	pdf("metaplot.dnase.pdf")
	par(mfrow=c(NROW(BWs),NROW(TFs)))
	for(k in 1:NROW(BWs))
	for(i in 1:NROW(TFs))
	{
		title0 <- paste(TFs[i],":",NROW(tb.dtminus[tb.dtminus$V10==TFs[i], ]), "/", NROW(tb.dtd[tb.dtd$V10==TFs[i], ]), sep="");
		cat(title0,"\n");
		writeMatplot(tb.dtminus[tb.dtminus$V10==TFs[i], ], tb.dtd[tb.dtd$V10==TFs[i], ], file.plus.bw, file.minus.bw, BWs[k],  title=title0, subs= NULL, breaks= NULL, cols= NULL, dist= 5000, step=25)
	}
	dev.off();
}


if(0)
{
	tb.dtminus <- DDminus;
	tb.dtd <- DDplus;

	tb.dREG <- unique(rbind(tb.dtminus[,1:4],tb.dtd[,1:4]));
	file.dREG.sample <- tempfile(fileext=".bed");
	write.table(tb.dREG, file=file.dREG.sample, quote=F, row.names=F, col.names=F, sep="\t");

	tbo <- read.table(pipe(paste("zcat ", file.TF.chipseq, " | grep K562 | bedtools intersect -a ", file.dREG.sample, " -b - -loj")));
	tbo <- tbo[tbo$V5!=".",];
	tbo <- tbo[,-c(9,10)];

	TF.names <- unique(tbo[,8]);
	TF.vec0 <- rep(0, NROW(TF.names));
	names(TF.vec0) <- TF.names;

	mat.TFs <-  do.call("rbind", mclapply( 1:NROW(tb.dREG), function(i){
	#mat.TFs <- do.call("rbind", mclapply( 1:200, function(i){
		idx <- which( tbo[,1]== as.character(tb.dREG[i,1]) & tbo[,2]==tb.dREG[i,2] & tbo[,3]==tb.dREG[i,3] );
		TF.vec <- TF.vec0;
		TF.vec[as.character(unique(tbo[idx, 8]))] <- 1;
		return(TF.vec);
	  }, mc.cores=30) );


	tb.dhs <- DHS.lessconservative()
	file.temp.DHS = tempfile(fileext=".bed");
	write.table(data.frame(unique(tb.dhs[,1:3]),1), file=file.temp.DHS, quote=F, row.names=F, col.names=F, sep="\t")

	tbh <- read.table(pipe(paste(" bedtools intersect -a ", file.G1, " -b ",  file.temp.DHS, "-wa")));
	DHS.status <- unlist(mclapply(1:NROW(tb.dREG), function(i){
		idx <- which( tbh[,1] == as.character(tb.dREG[i,1]) & tbh[,2]==tb.dREG[i,2] & tbh[,3]==tb.dREG[i,3] );
		return( NROW(idx) == 0 );
	  }, mc.cores=30) );

	colnames(tb.dREG)<-c("chr", "start","end","score");

	bw.plus <- load.bigWig(file.plus.bw);
	bw.minus <- load.bigWig(file.minus.bw)
	rc.plus <- bed.region.bpQuery.bigWig(bw.plus, tb.dREG, op = "sum", abs.value = TRUE);
	rc.minus <- bed.region.bpQuery.bigWig(bw.minus, tb.dREG, op = "sum", abs.value = TRUE);

	dreg.mat <- data.frame( tb.dREG[,c(1,2,3)], y=DHS.status, score=tb.dREG[,4], read=rc.plus+rc.minus, mat.TFs);

	train <- sample(1:NROW(dreg.mat))[1:(NROW(dreg.mat)*fold_cv)]
	test  <- rep(TRUE, NROW(dreg.mat));
	test[train] <- FALSE;
	test <- which(test)

	## Now the regression.
	df <- dreg.mat[,c(4,5)];
	sm <- glm(y~score, family=binomial, data=df[train,])	## y=score
	df <- dreg.mat[,-c(1:3)];
	tf <- glm(y~., family=binomial, data=df[train,])		## y=score+read+TFs

	df <- dreg.mat[,c(4,5)];
	scores_sm <- predict(sm, df[test,])
	df <- dreg.mat[,-c(1:3)];
	scores_tf <- predict(tf, df[test,])

	require(dREG)
	roc_sm <- logreg.roc.calc(dreg.mat$y[test], scores_sm);
	roc_tf <- logreg.roc.calc(dreg.mat$y[test], scores_tf);

	roc.auc(roc_sm)
	# 0.8127694
	roc.auc(roc_tf)
	#0.9604431

	pdf("roc.curve.pdf")
	roc.plot(roc_sm, xlim=c(0,1), ylim=c(0,1), col="dark gray")
	par(new = TRUE)
	roc.plot(roc_tf, xlim=c(0,1), ylim=c(0,1), col="black")
	dev.off();


if(1)
{
	bb <- boot(data= dreg.mat[,-c(1:3)], R= 1000, statistic= function(a, i) {
	   rc <- sample(c(4:NCOL(a)), 1)## Select a random column to leave out.
	   vals <- glm(y~., family=binomial, data=a[i,-1*rc])$coefficients
	   if(rc==NCOL(a)) {
	    ans  <- c(vals, NA)
	   } else {
	    ans  <- c(vals[1:(rc-1)], NA, vals[(rc):NROW(vals)])
	   }
	   names(ans) <- c("Intercept", colnames(a[2:NCOL(a)]))
	   return(ans)
	 },ncpus=30,parallel="multicore")
}

if(0)
{
	bb <- boot(data= dreg.mat[,-c(1:3)], R= 1000, statistic= function(a, i) {
	   vals <- glm(y~., family=binomial, data=a[i,])$coefficients
		vals
	},ncpus=30,parallel="multicore")
}

	std.error <- sapply(1:NROW(bb$t0) , function(x) {sd(bb$t[,x], na.rm=TRUE)})
	sig <- sapply(1:NROW(bb$t0) , function(x) {!xor(quantile(bb$t[,x], sl, na.rm=TRUE)>0, quantile(bb$t[,x], 1-sl, na.rm=TRUE)>0)}) # 0.025 0.975

	source("https://raw.githubusercontent.com/Danko-Lab/dREG/master/dREG_paper_analyses/train_svm/erna_regression/erna_drawbars.R");

	save(dreg.mat, bb, file="roc.all.boot.rdata");

	pdf("roc.all.0001.boot.pdf")
	drawBars(bb$t0, std.error, names(bb$t0))
	drawBars(bb$t0[sig], std.error[sig], names(bb$t0)[sig])
	drawBarsVertical(bb$t0[sig], std.error[sig], names(bb$t0)[sig])
	dev.off();

}



if(1)
{
	tb.dtminus <- DDminus;
	tb.dtd <- DDplus;

	tb.dREG <- rbind(data.frame(unique(tb.dtminus[,1:4]),y=1), data.frame(unique(tb.dtd[,1:4]), y=0));
	tbo <- rbind(tb.dtminus, tb.dtd);

	TF.names <- unique(tbo[,6]);
	TF.vec0 <- rep(0, NROW(TF.names));
	names(TF.vec0) <- TF.names;

	mat.TFs <-  do.call("rbind", mclapply( 1:NROW(tb.dREG), function(i){
	#mat.TFs <- do.call("rbind", mclapply( 1:200, function(i){
		idx <- which( tbo[,1]== as.character(tb.dREG[i,1]) & tbo[,2]==tb.dREG[i,2] & tbo[,3]==tb.dREG[i,3] );
		TF.vec <- TF.vec0;
		TF.vec[as.character(unique(tbo[idx, 6]))] <- 1;
		return(TF.vec);
	  }, mc.cores=30) );


	colnames(tb.dREG)<-c("chr", "start","end","score", "y");

	bw.plus <- load.bigWig(file.plus.bw);
	bw.minus <- load.bigWig(file.minus.bw)
	rc.plus <- bed.region.bpQuery.bigWig(bw.plus, tb.dREG, op = "sum", abs.value = TRUE);
	rc.minus <- bed.region.bpQuery.bigWig(bw.minus, tb.dREG, op = "sum", abs.value = TRUE);

	dreg.mat <- data.frame( tb.dREG[,c(1,2,3,5,4)], read=rc.plus+rc.minus, mat.TFs);

	train <- sample(1:NROW(dreg.mat))[1:(NROW(dreg.mat)*fold_cv)]
	test  <- rep(TRUE, NROW(dreg.mat));
	test[train] <- FALSE;
	test <- which(test)

	## Now the regression.
	df <- dreg.mat[,c(4,5)];
	sm <- glm(y~score, family=binomial, data=df[train,])	## y=score
	df <- dreg.mat[,-c(1:3)];
	tf <- glm(y~., family=binomial, data=df[train,])		## y=score+read+TFs

	df <- dreg.mat[,c(4,5)];
	scores_sm <- predict(sm, df[test,])
	df <- dreg.mat[,-c(1:3)];
	scores_tf <- predict(tf, df[test,])

	require(dREG)
	roc_sm <- logreg.roc.calc(dreg.mat$y[test], scores_sm);
	roc_tf <- logreg.roc.calc(dreg.mat$y[test], scores_tf);

	roc.auc(roc_sm)
	# 0.8127694
	roc.auc(roc_tf)
	#0.9604431

	pdf("roc.curve.pdf")
	roc.plot(roc_sm, xlim=c(0,1), ylim=c(0,1), col="dark gray")
	par(new = TRUE)
	roc.plot(roc_tf, xlim=c(0,1), ylim=c(0,1), col="black")
	dev.off();





if(1)
{
	bb <- boot(data= dreg.mat[,-c(1:3)], R= 1000, statistic= function(a, i) {
	   rc <- sample(c(4:NCOL(a)), 1)## Select a random column to leave out.
	   vals <- glm(y~., family=binomial, data=a[i,-1*rc])$coefficients
	   if(rc==NCOL(a)) {
	    ans  <- c(vals, NA)
	   } else {
	    ans  <- c(vals[1:(rc-1)], NA, vals[(rc):NROW(vals)])
	   }
	   names(ans) <- c("Intercept", colnames(a[2:NCOL(a)]))
	   return(ans)
	 },ncpus=30,parallel="multicore")
}

if(0)
{
	bb <- boot(data= dreg.mat[,-c(1:3)], R= 1000, statistic= function(a, i) {
	   vals <- glm(y~., family=binomial, data=a[i,])$coefficients
		vals
	},ncpus=30,parallel="multicore")
}

	std.error <- sapply(1:NROW(bb$t0) , function(x) {sd(bb$t[,x], na.rm=TRUE)})
	sig <- sapply(1:NROW(bb$t0) , function(x) {!xor(quantile(bb$t[,x], sl, na.rm=TRUE)>0, quantile(bb$t[,x], 1-sl, na.rm=TRUE)>0)}) # 0.025 0.975

	source("https://raw.githubusercontent.com/Danko-Lab/dREG/master/dREG_paper_analyses/train_svm/erna_regression/erna_drawbars.R");

	save(dreg.mat, bb, file="roc.all.boot.rdata");

	pdf("roc.all.0001.boot.pdf")
	drawBars(bb$t0, std.error, names(bb$t0))
	drawBars(bb$t0[sig], std.error[sig], names(bb$t0)[sig])
	drawBarsVertical(bb$t0[sig], std.error[sig], names(bb$t0)[sig])
	dev.off();

}