#!/usr/bin/perl
use Time::Local;
use Term::ANSIColor;
use List::Util qw[min max];
system ("rm *.REGIONS; rm *.bed *.narrowPeak; ls -l; sleep 3");
##########################################
$MOUSE_GENOME="MOUSE_GENOME.bed";
open (CHROM_SIZES,"cat /scratch/iwase-lab/SAURABH/GENOMES/MM9/mm9.chrom.sizes|grep -v hap|grep -v random|grep -v chrM|grep -v chrY |");
open (MOUSE_GENOME,"> $MOUSE_GENOME");

while ($line = <CHROM_SIZES>) 
{
	chomp $line;
	($CHR,$SIZE)=split("\t", $line);
	$CHROMOSOME_SIZE{$CHR}=$SIZE;
	print "$CHR\t1\t$SIZE\n";
	print MOUSE_GENOME "$CHR\t1\t$SIZE\n";
}
close CHROM_SIZES;
close MOUSE_GENOME;
###############################################
open (RRNA_BED, ">RN45s.bed");
print RRNA_BED "chr17\t39979942\t39985774\n"; 
close (RRNA_BED);
###############################################
$BEDTOOLS="/home/saurabha/UTILITIES/BEDTOOLS/BEDTOOLS_2/bin";
$sortBed="$BEDTOOLS/sortBed";
$intersectBed="$BEDTOOLS/intersectBed";
$subtractBed="$BEDTOOLS/subtractBed";
$mergeBed="$BEDTOOLS/mergeBed";
##########################################
$REFGENE_MM9="/home/saurabha/UTILITIES/RNASEQc/refGene_mm9.txt";

$a=0;
open (REFGENE, "cat $REFGENE_MM9|grep -v hap|grep -v random|grep -v chrM|grep -v chrY|");
open (PROMOTER_REGIONS , " |grep -v hap|grep -v random|grep -v chrM|grep -v chrY |$sortBed -i - > PRE_PROMOTERS_1000.REGIONS");
open (PRE_GENIC_REGIONS, " |grep -v hap|grep -v random|grep -v chrM|grep -v chrY |$sortBed -i - > PRE_GENIC_1000.REGIONS");

NEXT_GENE:while ($line = <REFGENE>) 
{
	chomp $line;
	@INFO=split(/\t/, $line);
	$bin_num=$INFO[0];
	$transcript_id=$INFO[1];
	$chromosome=$INFO[2];
	$gene_strand=$INFO[3];
	$gene_left=$INFO[4];
	$gene_right=$INFO[5];
	$EXON_NUM=$INFO[8];
	$gene_name=$INFO[12];
	$length=$gene_right-$gene_left;
	
	if (substr($gene_name,0,3) eq "Mir") {next NEXT_GENE;} #print "$gene_name\n";
#	if ((substr($gene_name,0,5) eq "Snora") || (substr($gene_name,0,5) eq "Snord")) {print "$gene_name\n";next NEXT_GENE;}
	
	if ($gene_strand ne '-') {$PROMOTER=$gene_left; $TTS=$gene_right;$GENE_BODY_LEFT=$PROMOTER+1000;$GENE_BODY_RIGHT=$TTS+1000;}
	if ($gene_strand eq '-') {$PROMOTER=$gene_right; $TTS=$gene_left;$GENE_BODY_LEFT=$TTS-1000;$GENE_BODY_RIGHT=$PROMOTER-1000;}
	
	$PROMOTER_LEFT =$PROMOTER-1000;
	$PROMOTER_RIGHT=$PROMOTER+1000;
#	if ($PROMOTER_LEFT < 1) {$PROMOTER_LEFT=1;}
#	if ($PROMOTER_RIGHT > $CHROMOSOME_SIZE{$chromosome}) {$PROMOTER_RIGHT=$CHROMOSOME_SIZE{$chromosome};}
	print PROMOTER_REGIONS      "$chromosome\t$PROMOTER_LEFT\t$PROMOTER_RIGHT\n";

	if ($GENE_BODY_LEFT < 1) {$GENE_BODY_LEFT=1;}
	if ($GENE_BODY_RIGHT > $CHROMOSOME_SIZE{$chromosome}) {$GENE_BODY_RIGHT=$CHROMOSOME_SIZE{$chromosome};}
	if ($GENE_BODY_RIGHT > $GENE_BODY_LEFT) {print PRE_GENIC_REGIONS "$chromosome\t$GENE_BODY_LEFT\t$GENE_BODY_RIGHT\n";}
}
close (REFGENE);
close (PROMOTER_REGIONS);
close (PRE_GENIC_REGIONS);
system ("wc -l $REFGENE_MM9");
system ("wc -l *.REGIONS");

system ("$mergeBed -i PRE_PROMOTERS_1000.REGIONS > PROMOTERS_1000.REGIONS");
system ("$mergeBed -i PRE_GENIC_1000.REGIONS > MERGED_PRE_GENIC_1000.REGIONS");

system ("$subtractBed -a MERGED_PRE_GENIC_1000.REGIONS -b PROMOTERS_1000.REGIONS > GENIC_1000.REGIONS");
system ("$subtractBed -a $MOUSE_GENOME -b PROMOTERS_1000.REGIONS PRE_GENIC_1000.REGIONS > INTERGENIC_1000.REGIONS");
system ("$subtractBed -a $MOUSE_GENOME -b PROMOTERS_1000.REGIONS GENIC_1000.REGIONS > INTERGENIC_1000_CHECK.REGIONS");
system ("diff INTERGENIC_1000.REGIONS INTERGENIC_1000_CHECK.REGIONS");
system ("wc -l *.REGIONS");
<STDIN>;
###############################################################
@FILES=qw(PROMOTERS_1000.REGIONS GENIC_1000.REGIONS INTERGENIC_1000.REGIONS);
@COLORS=qw(0,255,0 0,0,255 255,0,0);
for $x (0 .. $#FILES) 
{
open (HEADERFILE,">HEADER_$x.txt"); 
print HEADERFILE "track type=bed name=\"$FILES[$x]\" description=\"$FILES[$x]\" color=\"$COLORS[$x]\"\n";
close HEADERFILE;
$FILELIST=$FILELIST." HEADER_$x.txt $FILES[$x]";
}
system ("cat $FILELIST > REGIONS.bed");
system ("wc -l REGIONS.bed");
sleep 3;
############### GETTING CTCF +/- 500 bp peaks ##################
$CTCF_NARROWPEAKS="/mnt/iwase/AGARWAL/External_mES_Data_2/CTCF/MACS2_PEAKS/Merged_CTCF_3_replicates_SRR207081_SRR172853_SRR172854.nodup_p1e-12_peaks.narrowPeak";
$P300_NARROWPEAKS="/scratch/iwase-lab/SAURABH/INTERGENIC_ENHANCERS/MACS2_PEAKS/p300_Peaks/MACS2_p300/p300_123_replicates_Input_None_q0.01_peaks.narrowPeak";
$DHS_NARROWPEAKS="/mnt/iwase/AGARWAL/UW_DHS_DATA/ALL_MACS2_PEAKS/UW_mES_DHS_no_subpeaks_q_0.05_peaks.narrowPeak";
$LSD1_NARROWPEAKS="/scratch/iwase-lab/SAURABH/INTERGENIC_ENHANCERS/MACS2_PEAKS/LSD1_Peaks/LSD1_mES_ChipSeq_2_Young_SRR122470_19_bw1_mm9_Input_Merged_All_q0.05_peaks.narrowPeak";


@MARKS=("CTCF:::$CTCF_NARROWPEAKS","p300:::$P300_NARROWPEAKS","DHS:::$DHS_NARROWPEAKS","LSD1:::$LSD1_NARROWPEAKS");

for $x (0 .. 3)
{
($MARK,$NARROWPEAKS)=split(":::",$MARKS[$x]);
print "$x\t$MARK\t$NARROWPEAKSCTCF", `ls -l $NARROWPEAKS`,`wc -l $NARROWPEAKS`,"\n";

open (NARROWPEAKS,"cat $NARROWPEAKS|grep -v hap|grep -v random|grep -v chrM|grep -v chrY|");
open (PEAKS_500,">PEAKS_$MARK.bed");
open (SUMMITS,">SUMMITS_$MARK.bed");

while ($line = <NARROWPEAKS>) 
{
	chomp $line;$c++;
	@MARK_DATA=split("\t",$line);
	$CHR        = $MARK_DATA[0];
	$SUMMIT     = $MARK_DATA[1]+$MARK_DATA[9];
	$SUMMIT_R   = $SUMMIT;
	print SUMMITS "$CHR\t$SUMMIT\t$SUMMIT\n"; 
	$PEAK_LEFT = $SUMMIT-250;
	$PEAK_RIGHT= $SUMMIT+250;
	print PEAKS_500 "$CHR\t$PEAK_LEFT\t$PEAK_RIGHT\n";
}
close NARROWPEAKS; close PEAKS_500; close SUMMITS;
}
system("cat PEAKS_CTCF.bed PEAKS_p300.bed |sort -k 1,1 -k2,2n |$mergeBed -i stdin > PEAKS_CTCF_AND_p300.bed");
system("$subtractBed -a PEAKS_CTCF_AND_p300.bed -b PEAKS_p300.bed > PEAKS_CTCF_ONLY.bed");
system("$subtractBed -a PEAKS_CTCF_AND_p300.bed -b PEAKS_CTCF.bed > PEAKS_p300_ONLY.bed");
system("$subtractBed -a PEAKS_CTCF_AND_p300.bed -b PEAKS_CTCF_ONLY.bed PEAKS_p300_ONLY.bed > BOTH_PEAKS_p300_CTCF_ONLY.bed");
########################### LSD1 PEAK ANALYSIS ##############################
$PROMOTER_REGION="PROMOTERS_1000.REGIONS";
$GENEBODY_REGION="GENIC_1000.REGIONS";
$INTERGENIC_REGION="INTERGENIC_1000.REGIONS";
$LSD1_SUMMITS="SUMMITS_LSD1.bed";
print "Number of LSD1 Summits=",`wc -l $LSD1_SUMMITS`;

@REGIONS_TO_CHECK=("PROMOTER:::$PROMOTER_REGION","GENEBODY:::$GENEBODY_REGION","INTERGENIC:::$INTERGENIC_REGION");
@COLORS=qw(green cyan magenta);

open (NUMBERS_OF_SITES,">NUMBERS_OF_SITES.txt");
for $x(0 .. 2)
{
($REGION,$REGION_BED)=split(":::",$REGIONS_TO_CHECK[$x]);
print color("$COLORS[$x]"), "\n\n$x $REGION\t$REGION_BED\n"; 
system ("$intersectBed -a $REGION_BED -b BOTH_PEAKS_p300_CTCF_ONLY.bed|sort -k 1,1 -k2,2n |$mergeBed -i stdin > BOTH_CTCF_p300_$REGION.bed");
system ("$subtractBed  -a $REGION_BED -b BOTH_CTCF_p300_$REGION.bed |$intersectBed -a stdin -b PEAKS_CTCF.bed > CTCF_$REGION.bed");
system ("$subtractBed  -a $REGION_BED -b BOTH_CTCF_p300_$REGION.bed |$intersectBed -a stdin -b PEAKS_p300.bed > p300_$REGION.bed");
system ("$subtractBed  -a $REGION_BED -b BOTH_CTCF_p300_$REGION.bed CTCF_$REGION.bed p300_$REGION.bed |$intersectBed -a stdin -b PEAKS_DHS.bed > OTHER_DHS_$REGION.bed");
system ("$subtractBed  -a $REGION_BED -b BOTH_CTCF_p300_$REGION.bed CTCF_$REGION.bed p300_$REGION.bed OTHER_DHS_$REGION.bed > REST_$REGION.bed");

@SUBREGION=("$REGION_BED","CTCF_$REGION.bed","BOTH_CTCF_p300_$REGION.bed","p300_$REGION.bed","OTHER_DHS_$REGION.bed","REST_$REGION.bed");
for $y (0 .. $#SUBREGION)
{
open (BED_FILE,"$SUBREGION[$y]");
$LENGTH_OF_REGION=0;
while ($line = <BED_FILE>) 
{ chomp $line;	($C,$L,$R)=split(/\t/, $line);	$LENGTH_OF_REGION=$LENGTH_OF_REGION+$R-$L;}
$LENGTH_OF_REGION=$LENGTH_OF_REGION/1000;

system ("$intersectBed -wa -u -a $LSD1_SUMMITS -b $SUBREGION[$y] > LSD1_at_$SUBREGION[$y]");
$NUMBER_OF_SITES=`$intersectBed -u -a $LSD1_SUMMITS -b $SUBREGION[$y]| wc -l`;
chomp $NUMBER_OF_SITES; 
$ENRICHMENT=$NUMBER_OF_SITES/$LENGTH_OF_REGION;
print "$SUBREGION[$y]\t$NUMBER_OF_SITES\t$LENGTH_OF_REGION\t$ENRICHMENT\n";
print NUMBERS_OF_SITES "$SUBREGION[$y]\t$LENGTH_OF_REGION\t$NUMBER_OF_SITES\t$ENRICHMENT\n";
}
}
close NUMBERS_OF_SITES;


