#!/usr/bin/perl
use Time::Local;
use Term::ANSIColor; 

system ("rm *.bed *.narrowPeak; ls -l; sleep 3");

########## CREATE PROMOTER BED TO GET PROMOTER DISTAL SITES ##########
$REFGENE_MM9="/mnt/promoter1/home/saurabh/LSD1_GENOME_RESEARCH/STOCK_FILES/refGene_mm9.txt";
$PROMOTER_REGIONS="1250bp_AROUND_PROMOTERS.bed";

open (REFGENE, "$REFGENE_MM9");
open (PROMOTER_BED, " |bedtools sort -i - > $PROMOTER_REGIONS");

NEXT_GENE:while ($line = <REFGENE>) 
{
	chomp $line;
	@INFO=split(/\t/, $line);
	$bin_num=$INFO[0];
	$transcript_id=$INFO[1];
	$chromosome=$INFO[2];
	$gene_strand=$INFO[3];
	$gene_left=$INFO[4];
	$gene_right=$INFO[5];
	$gene_name=$INFO[12];

	if ($gene_strand eq '+') {$PROMOTER=$gene_left;}
	if ($gene_strand eq '-') {$PROMOTER=$gene_right;}
	$PROMOTER_LEFT=$PROMOTER-1250;
	$PROMOTER_RIGHT=$PROMOTER+1250;
	if (($gene_strand ne '+') && ($gene_strand ne '-')) {$PROMOTER_LEFT=$gene_left-1250; $PROMOTER_RIGHT=$gene_right+1250;}

	print PROMOTER_BED "$chromosome\t$PROMOTER_LEFT\t$PROMOTER_RIGHT\t$gene_name\tNA\t$gene_strand\n";
}
close (REFGENE);
close (PROMOTER_BED);
system ("wc -l $REFGENE_MM9 $PROMOTER_REGIONS");
sleep 3;

############### CREATE 1600bp DISTAL p300/DHS sites ################################
$MERGE_LENGTH = 250;
$PEAK_FOLDER = "/mnt/promoter1/home/saurabh/LSD1_GENOME_RESEARCH/PEAK_CALLING";
$PEAKS_FILES = "$PEAK_FOLDER/H3K4me2_0h_mm9_q0.05_peaks.narrowPeak $PEAK_FOLDER/H3K4me2_24h_mm9_q0.05_peaks.narrowPeak";
$MERGED_PEAK_FILE = "Merged_H3K4me2_Peaks_0h_24h_q0.05_narrowPeaks_$MERGE_LENGTH.bed";


open (MACS2_PEAKS,"cat $PEAKS_FILES |bedtools sort -i stdin |bedtools merge -d $MERGE_LENGTH -i stdin|");
open (PEAK_BED_FILE,">$MERGED_PEAK_FILE");
$PEAK_NUM=0;

while ($line = <MACS2_PEAKS>)
{
$PEAK_NUM++;
chomp $line;
($CHR,$LEFT,$RIGHT)=split("\t",$line);
$NAME="H3K4me2_Peak_$PEAK_NUM".":::".$CHR.":::".$LEFT.":::".$RIGHT;
print PEAK_BED_FILE "$CHR\t$LEFT\t$RIGHT\t$NAME\t255\t.\n";
}
close MACS2_PEAKS;
close PEAK_BED_FILE;

system ("wc -l $MERGED_PEAK_FILE");

############### CREATE PROMOTER DISTAL p300/DHS sites ################################
open (RRNA_BED, ">RN45s.bed");
print RRNA_BED "chr17\t39979942\t39985774\n"; 
close (RRNA_BED);

$RRNA = "RN45s.bed";
$BLACKLISTED_REGIONS="/mnt/promoter1/home/saurabh/LSD1_GENOME_RESEARCH/STOCK_FILES/BLACKLISTED_REGIONS.bed";

$PDHPFC = "Valid_$MERGED_PEAK_FILE"; #H3K4me2_PEAKS_FOR_COVERAGE -rRNA - Blacklisted Regions 

system ("bedtools intersect -wa -v -a $MERGED_PEAK_FILE -b $RRNA $BLACKLISTED_REGIONS > $PDHPFC");

system ("wc -l *.bed");

##################### CREATE BED FILES FOR VISUALIZING Merged H3K4me2 sites on the browser ##########################
$BED_HEADER_1 = "track type=bed name=\"$PDHPFC\" description=\"$PDHPFC\" color=200,0,0";
open (HEADER,">BED_HEADER_1.txt");
print HEADER "$BED_HEADER_1\n";
close HEADER;
system ("cat BED_HEADER_1.txt $PDHPFC |gzip > $PDHPFC.gz");
sleep 5;


##################### NOW CALCULATING COVERAGE ON PROMOTER DISTAL p300/DHS sites ##########################
$BED_FOLDER="/mnt/promoter1/home/saurabh/LSD1_GENOME_RESEARCH/EXTENDED_BIGWIGS";

@SAMPLES=qw(
H3K4me2_0h 
H3K4me2_24h 
LSD1_0h 
LSD1_24h 
MLL4_0h 
MLL4_24h 
Input 
);

################## CALCULATING AVERAGE BASE COVERAGE ##################
$LIST_OF_FILES_1="";
$LIST_OF_FILES_2="";
for $s (0 .. $#SAMPLES)
{
$MARK = $SAMPLES[$s];
print color ("yellow"), "##### $s Now Calculating Coverage for $MARK\n",color ("reset");

$EXTENDED_BED_FILE = "$BED_FOLDER/Extended_$MARK"."_50bp_mm9.bed.gz";
$COVERAGE_CALCULATION = "gzip -cd $EXTENDED_BED_FILE | /usr/local/bin/bedtools coverage -F 0.5 -a $PDHPFC -b stdin > $MARK.coverage";

system("$COVERAGE_CALCULATION");

$HEADER="CHR\tLEFT\tRIGHT\tID\tSCORE\tSTRAND\t$MARK\n";
open (HEADER_FILE,">$MARK.header"); print HEADER_FILE "$HEADER"; close HEADER_FILE;

system("cat $MARK.header $MARK.coverage | cut -f 4,7 > $MARK.counts"); 
system("head $MARK.counts");

$LIST_OF_FILES_1 = $LIST_OF_FILES_1." $MARK.counts";
}
system ("paste $LIST_OF_FILES_1 |gzip > ALL_READ_COUNTS_at_H3K4me2_Peaks.counts.gz");
system ("rm *.header");

print color("green"),"Coverage Calculation Done for all\nCoverage Calculation Done for all\nCoverage Calculation Done for all\n",color("reset");
print "$LIST_OF_FILES";




