#!/usr/bin/perl
use Time::Local;
use Term::ANSIColor; 
$BEDTOOLS="/home/saurabha/UTILITIES/BEDTOOLS/BEDTOOLS_2/bin";
$sortBed="$BEDTOOLS/sortBed";
$intersectBed="$BEDTOOLS/intersectBed";
########################################
$RAW_READ_COUNT_FILE="/scratch/iwase-lab/SAURABH/FURTHER_ANALYSIS/GENE_EXPRESSION/WHOLE_GENES/FEATURECOUNTS/WHOLE_GENES_UNIQUE_GNT_Estimation_WT_KO";
$OUTLIER_GENES = "Cdk8 Cmss1 Hexb Camk1d Lars2 Rpph1 Malat1 Eef1a1 Hsp90aa1 Hspa8 Rmrp Actb Hsp90ab1 Gapdh";

open (FEATURE_READ_COUNTS,"cat $RAW_READ_COUNT_FILE | grep -v Program:featureCounts|grep -v Geneid|");
open (PROMOTER_BEDFILE,">PROMOTERS_FOR_LSD1_OVERLAP.bed");

while ($line = <FEATURE_READ_COUNTS>) 
{
	chomp $line;
	@INFO=split("\t",$line);
	$GENE_INFO = $INFO[0]; 
	$CHR       = $INFO[1];
	$LEFT      = $INFO[2];
	$RIGHT     = $INFO[3];
	$Strand    = $INFO[4];
	$Length    = $INFO[5];
	($C,$P,$S,$GENE_NAME)=split(":::",$GENE_INFO);
	$GENE_INFO_WITH_COUNTS=join("!!!",$GENE_INFO,$INFO[6],$INFO[7],$INFO[8],$INFO[9]);

	                     $PROMOTER=$LEFT; $PL=$PROMOTER-500;  $PR=$PROMOTER+1000;
	if ($Strand eq '-') {$PROMOTER=$RIGHT;$PL=$PROMOTER-1000; $PR=$PROMOTER+ 500;}
	
	if (($C ne $CHR) || ($P ne $PROMOTER) || ($S ne $Strand)) {print color ("red"), "$line\n"; die "Values do not match\n"; print color ("reset");}

	print PROMOTER_BEDFILE "$CHR\t$PL\t$PR\t$GENE_INFO_WITH_COUNTS\t0\t$Strand\n";
	
}
close GROSEQ_FOR_DESEQ;
close PROMOTER_BEDFILE;
close FEATURE_READ_COUNTS;

system ("wc -l $RAW_READ_COUNT_FILE *.bed ");
sleep 1;
########################################
$LSD1_PEAKS="/scratch/iwase-lab/SAURABH/INTERGENIC_ENHANCERS/MACS2_PEAKS/LSD1_Peaks/LSD1_mES_ChipSeq_2_Young_SRR122470_19_bw1_mm9_Input_Merged_All_q0.05_peaks.narrowPeak";
system ("bedtools intersect -wa -u -a PROMOTERS_FOR_LSD1_OVERLAP.bed -b $LSD1_PEAKS  > LSD1_BOUND_PROMOTERS.bed");
system ("bedtools intersect -wa -v -a PROMOTERS_FOR_LSD1_OVERLAP.bed -b $LSD1_PEAKS  > LSD1_UNBOUND_PROMOTERS.bed");
system ("wc -l *.bed");

########################################
open (GROSEQ_FOR_DESEQ_ALL,">GROSEQ_READ_COUNTS_FOR_DESEQ_ALL.txt");
open (GROSEQ_FOR_DESEQ_BOUND,">GROSEQ_READ_COUNTS_FOR_DESEQ_BOUND.txt");

print GROSEQ_FOR_DESEQ_ALL "GENE\tWT_GROSEQ_1\tWT_GROSEQ_2\tKO_GROSEQ_1\tKO_GROSEQ_1\n";
print GROSEQ_FOR_DESEQ_BOUND "GENE\tWT_GROSEQ_1\tWT_GROSEQ_2\tKO_GROSEQ_1\tKO_GROSEQ_1\n";

@PROMOTER_FILES=qw(LSD1_BOUND LSD1_UNBOUND);

for $x (0 .. 1)
{
$STATUS  = $PROMOTER_FILES[$x];
$PROMOTER_BEDFILE = $PROMOTER_FILES[$x]."_PROMOTERS.bed";

open($PROMOTER_FILE,"$PROMOTER_BEDFILE");
while ($line=<$PROMOTER_FILE>)
{
	chomp $line;
	($CHR,$PL,$PR,$GENE_INFO_WITH_COUNTS,$SCORE,$STRAND)=split("\t",$line);
	($GENE_INFO,$GROSEQ_WT_1,$GROSEQ_WT_2,$GROSEQ_KO_1,$GROSEQ_KO_2)=split("!!!",$GENE_INFO_WITH_COUNTS);
	($C,$P,$S,$GENE_NAME)=split(":::",$GENE_INFO);
	$GENE_STATUS=$GENE_INFO.":::".$STATUS;
	
	if (index($OUTLIER_GENES,$GENE_NAME,0) eq -1)
	{
	print GROSEQ_FOR_DESEQ_ALL   "$GENE_STATUS\t$GROSEQ_WT_1\t$GROSEQ_WT_2\t$GROSEQ_KO_1\t$GROSEQ_KO_2\n";
	if ($x eq 0) {print GROSEQ_FOR_DESEQ_BOUND "$GENE_STATUS\t$GROSEQ_WT_1\t$GROSEQ_WT_2\t$GROSEQ_KO_1\t$GROSEQ_KO_2\n";}
	}
}
close $PROMOTER_FILE;
}
close GROSEQ_FOR_DESEQ_ALL;
close GROSEQ_FOR_DESEQ_BOUND;
########################################
system ("head *.txt");

system ("Rscript DESEQ_GROSEQ.R GROSEQ_READ_COUNTS_FOR_DESEQ_BOUND.txt DESEQ_GROSEQ_READ_COUNTS_BOUND.txt &");
system ("Rscript DESEQ_GROSEQ.R GROSEQ_READ_COUNTS_FOR_DESEQ_ALL.txt   DESEQ_GROSEQ_READ_COUNTS_ALL.txt");


print "Done !!!\nDone !!!\nDone !!!\nDone !!!\nDone !!!\n";




