#!/usr/bin/perl
use Time::Local;
use Term::ANSIColor;
use List::Util qw[min max];

$BEDTOOLS="/home/saurabha/UTILITIES/BEDTOOLS/BEDTOOLS_2/bin";
$sortBed="$BEDTOOLS/sortBed";
$intersectBed="$BEDTOOLS/intersectBed";

$REFGENE_MM9="/home/saurabha/UTILITIES/RNASEQc/refGene_mm9.txt";
system ("rm *.bed *.narrowPeak *.SAF; ls -l; sleep 3");

$a=0;
$GENIC_REGIONS="KNOWN_GENES_1250Up_to_3000Down_REFGENE_mm9_BED.bed";
open (REFGENE, "$REFGENE_MM9");
open (PROMOTER_BED, " |$sortBed -i - > $GENIC_REGIONS");
NEXT_GENE:while ($line = <REFGENE>) 
{
	chomp $line;
	@INFO=split(/\t/, $line);
	$bin_num=$INFO[0];
	$transcript_id=$INFO[1];
	$chromosome=$INFO[2];
	$gene_strand=$INFO[3];
	$gene_left=$INFO[4];
	$gene_right=$INFO[5];
	$EXON_NUM=$INFO[8];
	$gene_name=$INFO[12];
	$length=$gene_right-$gene_left;
	
#	if (substr($gene_name,0,3) eq "Mir") {print "$gene_name\n";next NEXT_GENE;}
#	if ((substr($gene_name,0,5) eq "Snora") || (substr($gene_name,0,5) eq "Snord")) {print "$gene_name\n";next NEXT_GENE;}

	if ($gene_strand eq '+') {$GENE_LEFT=$gene_left-1500; $GENE_RIGHT=$gene_right+3000;}
	if ($gene_strand eq '-') {$GENE_LEFT=$gene_left-3000; $GENE_RIGHT=$gene_right+1500;}
	if (($gene_strand ne '+') && ($gene_strand ne '-')) {$GENE_LEFT=$gene_left-1250; $GENE_RIGHT=$gene_right+1250;}

	print PROMOTER_BED "$chromosome\t$GENE_LEFT\t$GENE_RIGHT\t$gene_name\tNA\t$gene_strand\n";
}
close (REFGENE);
close (PROMOTER_BED);
system ("wc -l $REFGENE_MM9");
system ("wc -l $GENIC_REGIONS");
sleep 3;
###############################################
open (RRNA_BED, ">RN45s.bed");
print RRNA_BED "chr17\t39979942\t39985774\n"; 
close (RRNA_BED);
###############################################
open (PREDICTED_ENHANCER, "$sortBed -i Malik_Enhancers.txt|");
open (PREDICTED_ENHANCER_SUMMITS,"> PREDICTED_ENHANCERS_CN_SUMMITS.bed");

while ($line = <PREDICTED_ENHANCER>)
{
chomp $line;	
($CHR,$LEFT,$RIGHT,$ID)=split ("\t",$line);
$CENTER=int(($LEFT+$RIGHT)/2);
$LEFT_CENTER=$CENTER-1;
print PREDICTED_ENHANCER_SUMMITS "$CHR\t$LEFT_CENTER\t$CENTER\t$ID\n";
}
close PREDICTED_ENHANCER_SUMMITS;

###############################################
$BLACKLISTED_REGIONS="/scratch/iwase-lab/SAURABH/INTERGENIC_ENHANCERS/BLACKLISTED/BLACKLISTED_REGIONS.bed";
system("$intersectBed -wa -v -a PREDICTED_ENHANCERS_CN_SUMMITS.bed -b $BLACKLISTED_REGIONS $GENIC_REGIONS RN45s.bed > INTERGENIC_ENHANCERS_CN_SUMMITS.bed");
################# BED TO SAF ##################
$HEADER ="GeneID\tChr\tStart\tEnd\tStrand\n"; 
open (ENHANCER_SUMMITS,"INTERGENIC_ENHANCERS_CN_SUMMITS.bed");
open (ENHANCER_SAF_500 ,">INTERGENIC_ENHANCERS_CN_500.SAF");     print ENHANCER_SAF_500  "$HEADER";
open (ENHANCER_SAF_750 ,">INTERGENIC_ENHANCERS_CN_750.SAF");     print ENHANCER_SAF_750  "$HEADER";
open (ENHANCER_SAF_1000,">INTERGENIC_ENHANCERS_CN_1000.SAF");    print ENHANCER_SAF_1000 "$HEADER";

while ($line = <ENHANCER_SUMMITS>)
{
chomp $line;	
($CHR,$LC,$CENTER,$ID)=split ("\t",$line);

$LEFT=$CENTER-500;$RIGHT=$CENTER+500;	print ENHANCER_SAF_500   "$ID\t$CHR\t$LEFT\t$RIGHT\t.\n";
$LEFT=$CENTER-750;$RIGHT=$CENTER+750;	print ENHANCER_SAF_750   "$ID\t$CHR\t$LEFT\t$RIGHT\t.\n";
$LEFT=$CENTER-1000;$RIGHT=$CENTER+1000;	print ENHANCER_SAF_1000  "$ID\t$CHR\t$LEFT\t$RIGHT\t.\n";
}
close ENHANCER_SUMMITS;
close ENHANCER_SAF_500;
close ENHANCER_SAF_750;
close ENHANCER_SAF_1000;

system ("wc -l *");
######################################
$BRUSEQ_FOLDER="/scratch/iwase-lab/SAURABH/BRUSEQ_ANALYSIS/BRUSEQ_LSD1_CN_RUN_1/TOPHAT/TOPHAT_FINISHED_AND_MERGED_BAM_FILES_RUN_1";

@SAMPLE_INFO=qw(
LSD1_A2:::Merged_Pool2-DLAR_CN_shRNA_A2_S6_L002_R1_001_mm9_TOPHAT_bw2.bam
LSD1_B2:::Merged_Pool2-DLAR_CN_shRNA_B2_S8_L002_R1_001_mm9_TOPHAT_bw2.bam
CNWT_C2:::Merged_Pool2-DLAR_CN_shRNA_Control_2_S7_L002_R1_001_mm9_TOPHAT_bw2.bam
CNWT_U2:::Merged_Pool2-DLAR_CN_Untreated_2_S5_L002_R1_001_mm9_TOPHAT_bw2.bam
LSD1_A3:::Merged_Pool3-DLAR_CN_shRNA_A3_S2_L001_R1_001_mm9_TOPHAT_bw2.bam
LSD1_B3:::Merged_Pool3-DLAR_CN_shRNA_B3_S4_L001_R1_001_mm9_TOPHAT_bw2.bam
CNWT_C3:::Merged_Pool3-DLAR_CN_shRNA_Control_3_S3_L001_R1_001_mm9_TOPHAT_bw2.bam
CNWT_U3:::Merged_Pool3-DLAR_CN_Untreated_3_S1_L001_R1_001_mm9_TOPHAT_bw2.bam);

for $x(0 .. $#SAMPLE_INFO)
{
($MARK,$FILE)=split(":::",$SAMPLE_INFO[$x]);
system ("ls -lh $BRUSEQ_FOLDER/$FILE"); sleep 1;
system ("ln -s $BRUSEQ_FOLDER/$FILE $MARK.bam");
$FILELIST=$FILELIST." $MARK.bam";
}
print "$FILELIST\n";

######################################
$FEATURECOUNTS="/home/saurabha/UTILITIES/SUBREAD/subread-1.5.0-p1-source/bin/featureCounts";

$OPTIONS="-T 15 -a INTERGENIC_ENHANCERS_CN_1000.SAF -F SAF -O -g GeneID ";

$COMMAND="$FEATURECOUNTS $OPTIONS $FILELIST -o ALL_CN_INTERGENIC_ENHANCERS";
print "$COMMAND\n";
system ("$COMMAND");
sleep 3;
print "Done !!!\nDone !!!\nDone !!!\nDone !!!\nDone !!!\n";



