#!/usr/bin/perl
use Time::Local;
use Term::ANSIColor; 

##########################################
@COUNT_INFO=qw(
INPUT:::207912051:::2001
H3K4me1:::41202217:::2001
H3K4me2:::83279604:::1251
H3K4me3:::44855008:::1251
H3K27Ac:::87718221:::1251
H3K27me3:::147445234:::2001
H3K9Ac:::42691637:::1251
H3K9me3:::96607773:::2001
p300:::29971501:::1001
LSD1:::22846609:::1001
SMCX:::50182748:::1001
);

@ALPHABET=qw(N B C D E F G H I J K L);

for $x (0 .. $#COUNT_INFO) 
{
	($MARK,$COUNT,$LENGTH) = split(":::",$COUNT_INFO[$x]);
	$TOTAL_COUNT{$MARK}    = $COUNT;
	$FEATURE_LENGTH{$MARK} = $LENGTH;
	$PREFIX{$MARK}         = $ALPHABET[$x]."0";
	print color("red"), "$MARK\t$TOTAL_COUNT{$MARK}\t$FEATURE_LENGTH{$MARK}\t$PREFIX{$MARK}\n";     
}

##########################################
open (PROMOTER_COUNTS,"cat /scratch/iwase-lab/SAURABH/FURTHER_ANALYSIS/GENE_EXPRESSION/WHOLE_GENES/LSD1_PROMOTER_COUNTS/RAW_COUNTS/WT_mES_ALL_MARKS_For_Promoter_Estimation|grep -v INPUT_WT_mES.bam|");
open (DATA_FOR_BOXPLOT,">DATA_FOR_BOXPLOT_PROMOTERS.txt");
open (LSD1_VS_FPKM,   ">LSD1_LEVELS_AT_PROMOTERS.txt");
open (H3K4me2_VS_FPKM,">H3K4me2_LEVELS_AT_PROMOTERS.txt");

@MARKS  = qw(H3K4me1 H3K4me2 H3K4me3 H3K27Ac H3K27me3 H3K9Ac H3K9me3 p300 LSD1 SMCX);

print LSD1_VS_FPKM     "GROSEQ\tLSD1_LEVELS\n";
print H3K4me2_VS_FPKM  "LSD1_LEVELS\tGROSEQ\n";

print DATA_FOR_BOXPLOT "VALUE\tFEATURE\n";
print DATA_FOR_BOXPLOT "\tA1\n\tA2\n\tB1\n\tB2\n\tC1\n\tC2\n\tD1\n\tD2\n\tE1\n\tE2\n\tF1\n\tF2\n\tG1\n\tG2\n\tH1\n\tH2\n\tI1\n\tI2\n\tJ1\n\tJ2\n";


while ($line = <PROMOTER_COUNTS>)
{
$n++;
chomp $line;
($PROMOTER_INFO,$Chr,$Start,$End,$S,$L,$INPUT,$H3K4me1,$H3K4me2,$H3K4me3,$H3K27Ac,$H3K27me3,$H3K9Ac,$H3K9me3,$p300,$LSD1,$SMCX)=split("\t",$line);

($CHR,$PROMOTER,$GENE_STRAND,$GENE_NAME,$C,$L,$R,$S,$GROSEQ_WT,$GROSEQ_KO)=split(":::",$PROMOTER_INFO);

if  ($GROSEQ_WT <= 0.002)                           {$TYPE="A_NON"; $NONE++;}
if (($GROSEQ_WT >  0.002) && ($GROSEQ_WT <= 0.100)) {$TYPE="B_VER"; $VERY_LOW++;}
if (($GROSEQ_WT >  0.100) && ($GROSEQ_WT <= 1.000)) {$TYPE="C_LOW"; $LOW++;}
if (($GROSEQ_WT >  1.000) && ($GROSEQ_WT <= 2.500)) {$TYPE="D_MED"; $MEDIUM++;}
if  ($GROSEQ_WT >  2.500)                           {$TYPE="E_HIG"; $HIGH++;}

$nINPUT = ($INPUT+0.01)*(1000000/$TOTAL_COUNT{'INPUT'})*(1000/2001);

for $x (0 .. $#MARKS)
{
	$MARK       = $MARKS[$x];
	$NORMALIZED = ($$MARK+0.01)*(1000000/$TOTAL_COUNT{$MARK})*(1000/$FEATURE_LENGTH{$MARK})*(1/$nINPUT);
	$LOG_NORMALIZED{$MARK} = log($NORMALIZED)/log(2);
	print DATA_FOR_BOXPLOT "$LOG_NORMALIZED{$MARK}\t$PREFIX{$MARK}\_$MARK\_$TYPE\n";
}

$GROSEQ          = log($GROSEQ_WT + 0.0001)/log(10); if ($GROSEQ > 3) {$GROSEQ=3;}
$LSD1_SIGNAL     = $LOG_NORMALIZED{'LSD1'}         ; if ($LSD1_SIGNAL > 4.2) {$LSD1_SIGNAL=4.2;} if ($LSD1_SIGNAL < -4) {$LSD1_SIGNAL=-4;}
print LSD1_VS_FPKM    "$GROSEQ\t$LSD1_SIGNAL\n";
print H3K4me2_VS_FPKM "$LOG_NORMALIZED{'H3K4me2'}\t$GROSEQ\n";

$GROSEQ          = log($GROSEQ_WT + 0.0001)/log(10);
print DATA_FOR_BOXPLOT "$GROSEQ\tA0_GROSEQ$TYPE\n";
}
print "NONE= $NONE\nVERY_LOW= $VERY_LOW\nLOW= $LOW\nMED= $MEDIUM\nHIGH= $HIGH\n";

close DATA_FOR_BOXPLOT;
close PROMOTER_COUNTS;
close LSD1_VS_FPKM;
close H3K4me2_VS_FPKM;

system ("Rscript boxplot_quintiles.R DATA_FOR_BOXPLOT_PROMOTERS.txt BOXPLOT_PROMOTERS_QUINTILES.png");
system ("Rscript SCATTER_PLOT.R LSD1_LEVELS_AT_PROMOTERS.txt LSD1_GENE_EXPRESSION.png");
system ("Rscript SCATTER_PLOT.R H3K4me2_LEVELS_AT_PROMOTERS.txt H3K4me2_GENE_EXPRESSION.png");


