#Script to run DIFFREPS for identifying in sliding windows - differenntially occupied pepaks
#Refer https://github.com/shenlab-sinai/diffreps for details


##TASK1: Convert all the bam files to bedfiles using bedtools

#Source of bam files= /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/bamfiles/PCR_dup_removed
#These files are created with k1 option and the PCR DUPLICATES ARE REMOVED!!

mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Alignments_BEDs
Original_Files=`find /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Cutadapt_Bowtie/Bowtie_k1/bamfiles/PCR_dup_removed/ -maxdepth 1 -type f -name "*.bam" | sort`
for f in $Original_Files; do
    OutputFile_bed=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Alignments_BEDs/`basename "${f/_k1_rmdup.bam/_k1_rmdup.bed}"`
    echo "Processing bedconversion $f"
    bedtools bamtobed -i $f > $OutputFile_bed 
    echo "Completed bedconversion $f"
done
	
##TASK2: Running Diffreps and writing the output to a file.
#Use the corresponding genotype's H3 levels for the normalisation of the output enrichment	
#bedfiles=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Alignments_BEDs
cd /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Alignments_BEDs
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output
##Comparing H3K27me3 marks
diffReps.pl -tr H3K27me3_nrpd1-kd_Rep1_k1_rmdup.bed H3K27me3_nrpd1-kd_Rep2_k1_rmdup.bed -co H3K27me3_WT_Rep1_k1_rmdup.bed H3K27me3_WT_Rep2_k1_rmdup.bed --btr H3_nrpd1-kd_Rep1_k1_rmdup.bed H3_nrpd1-kd_Rep2_k1_rmdup.bed --bco H3_WT_Rep1_k1_rmdup.bed H3_WT_Rep2_k1_rmdup.bed --mode p --nsd b -re /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/H3K27me3_diffreps_Out.txt -meth nb --pval 0.0001 --noanno --chrlen /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/IRGSP_1_chromsizes.txt

##Comparing H3K9me2 marks ::::Comparing WT reps 1 and 2 with nrpd1 reps 1 and 2
diffReps.pl -tr H3K9me2_nrpd1-kd_Rep1_k1_rmdup.bed H3K9me2_nrpd1-kd_Rep2_k1_rmdup.bed -co H3K9me2_WT_Rep1_k1_rmdup.bed H3K9me2_WT_Rep2_k1_rmdup.bed --btr H3_nrpd1-kd_Rep1_k1_rmdup.bed H3_nrpd1-kd_Rep2_k1_rmdup.bed --bco H3_WT_Rep1_k1_rmdup.bed H3_WT_Rep2_k1_rmdup.bed --mode p --nsd b -re /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/H3K9me2_diffreps_Out_compWT12_12.txt -meth nb --pval 0.0001 --noanno --chrlen /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/IRGSP_1_chromsizes.txt

##Comparing H3K9me2 marks ::::Comparing WT reps 1 and 2 with nrpd1 reps 3 and 4
diffReps.pl -tr H3K9me2_nrpd1-kd_Rep3_k1_rmdup.bed H3K9me2_nrpd1-kd_Rep4_k1_rmdup.bed -co H3K9me2_WT_Rep1_k1_rmdup.bed H3K9me2_WT_Rep2_k1_rmdup.bed --btr H3_nrpd1-kd_Rep1_k1_rmdup.bed H3_nrpd1-kd_Rep2_k1_rmdup.bed --bco H3_WT_Rep1_k1_rmdup.bed H3_WT_Rep2_k1_rmdup.bed --mode p --nsd b -re /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/H3K9me2_diffreps_Out_compWT12_34.txt -meth nb --pval 0.0001 --noanno --chrlen /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/IRGSP_1_chromsizes.txt

##Comparing H3K9me2 marks ::::Comparing WT reps 3 and 4 with nrpd1 reps 3 and 4
diffReps.pl -tr H3K9me2_nrpd1-kd_Rep3_k1_rmdup.bed H3K9me2_nrpd1-kd_Rep4_k1_rmdup.bed -co H3K9me2_WT_Rep3_k1_rmdup.bed H3K9me2_WT_Rep4_k1_rmdup.bed --btr H3_nrpd1-kd_Rep1_k1_rmdup.bed H3_nrpd1-kd_Rep2_k1_rmdup.bed --bco H3_WT_Rep1_k1_rmdup.bed H3_WT_Rep2_k1_rmdup.bed --mode p --nsd b -re /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/H3K9me2_diffreps_Out_compWT34_34.txt -meth nb --pval 0.0001 --noanno --chrlen /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/IRGSP_1_chromsizes.txt

##Comparing PolII marks ::::Normalising to H3 IPs
diffReps.pl -tr PolII_nrpd1-kd_k1_rmdup.bed PolII_nrpd1-kd_Rep2_k1_rmdup.bed -co PolII_WT_k1_rmdup.bed PolII_WT_Rep2_k1_rmdup.bed --btr H3_nrpd1-kd_Rep1_k1_rmdup.bed H3_nrpd1-kd_Rep2_k1_rmdup.bed --bco H3_WT_Rep1_k1_rmdup.bed H3_WT_Rep2_k1_rmdup.bed --mode p --nsd b -re /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/PolII_diffreps_Out_H3_normalised.txt -meth nb --pval 0.0001 --noanno --chrlen /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/IRGSP_1_chromsizes.txt

##Comparing H3K4me3 marks
diffReps.pl -tr H3K4me3_nrpd1-kd_Rep1_k1_rmdup.bed H3K4me3_nrpd1-kd_Rep2_k1_rmdup.bed -co H3K4me3_WT_Rep1_k1_rmdup.bed H3K4me3_WT_Rep2_k1_rmdup.bed --btr H3_nrpd1-kd_Rep1_k1_rmdup.bed H3_nrpd1-kd_Rep2_k1_rmdup.bed --bco H3_WT_Rep1_k1_rmdup.bed H3_WT_Rep2_k1_rmdup.bed --mode p --nsd s -re /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/H3K4me3_diffreps_Out_narrow.txt -meth nb --pval 0.0001 --noanno --chrlen /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/IRGSP_1_chromsizes.txt

#TASK3: Make bedfiles of the hotspot of Differential enrichments, higher occupancy and lower occupancy for each histone mark and polymerase II occupancy sites.
#REMOVE THE regions not from chrom 1-12.
#Get the hotspots file and manually curate to get a bed file.
#Open the differntial occupancy file and split based on the up/down regulated occupancy into two separate bedfiles.
#Manually done.
#The resultant bedfiles are in sftp://svp@srna/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/Bedfiles
##ADD A UNIQUE ID TO EACH BED row entry
cd /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/Bedfiles
awk '{$4=$4 "_H3K4me3_downriched"}1' H3K4me3_downriched_diffreps.bed > tmp.csv && mv -f tmp.csv H3K4me3_downriched_diffreps.bed
awk '{$4=$4 "_H3K4me3_upriched"}1' H3K4me3_upriched_diffreps.bed > tmp.csv && mv -f tmp.csv H3K4me3_upriched_diffreps.bed
awk '{$4=$4 "_H3K4me3_HOTSPOTS"}1' H3K4me3_HOTSPOTS_diffreps.bed > tmp.csv && mv -f tmp.csv H3K4me3_HOTSPOTS_diffreps.bed
awk '{$4=$4 "_H3K27me3_downriched"}1' H3K27me3_downriched_diffreps.bed > tmp.csv && mv -f tmp.csv H3K27me3_downriched_diffreps.bed
awk '{$4=$4 "_H3K27me3_upriched"}1' H3K27me3_upriched_diffreps.bed > tmp.csv && mv -f tmp.csv H3K27me3_upriched_diffreps.bed
awk '{$4=$4 "_H3K27me3_HOTSPOTS"}1' H3K27me3_HOTSPOTS_diffreps.bed > tmp.csv && mv -f tmp.csv H3K27me3_HOTSPOTS_diffreps.bed
awk '{$4=$4 "_PolII_downriched"}1' PolII_downriched_diffreps.bed > tmp.csv && mv -f tmp.csv PolII_downriched_diffreps.bed
awk '{$4=$4 "_PolII_upriched"}1' PolII_upriched_diffreps.bed > tmp.csv && mv -f tmp.csv PolII_upriched_diffreps.bed
awk '{$4=$4 "_PolII_HOTSPOTS"}1' PolII_HOTSPOTS_diffreps.bed > tmp.csv && mv -f tmp.csv PolII_HOTSPOTS_diffreps.bed
awk '{$4=$4 "_H3K9me2_downriched_diffreps_WT12_12"}1' H3K9me2_downriched_WT12_12_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_downriched_WT12_12_diffreps.bed
awk '{$4=$4 "_H3K9me2_upriched_diffreps_WT12_12"}1' H3K9me2_upriched_WT12_12_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_upriched_WT12_12_diffreps.bed
awk '{$4=$4 "_H3K9me2_HOTSPOTS_diffreps_WT12_12"}1' H3K9me2_HOTSPOTS_WT12_12_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_HOTSPOTS_WT12_12_diffreps.bed
awk '{$4=$4 "_H3K9me2_downriched_diffreps_WT12_34"}1' H3K9me2_downriched_WT12_34_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_downriched_WT12_34_diffreps.bed
awk '{$4=$4 "_H3K9me2_upriched_diffreps_WT12_34"}1' H3K9me2_upriched_WT12_34_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_upriched_WT12_34_diffreps.bed
awk '{$4=$4 "_H3K9me2_HOTSPOTS_diffreps_WT12_34"}1' H3K9me2_HOTSPOTS_WT12_34_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_HOTSPOTS_WT12_34_diffreps.bed
awk '{$4=$4 "_H3K9me2_downriched_diffreps_WT34_34"}1' H3K9me2_downriched_WT34_34_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_downriched_WT34_34_diffreps.bed
awk '{$4=$4 "_H3K9me2_upriched_diffreps_WT34_34"}1' H3K9me2_upriched_WT34_34_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_upriched_WT34_34_diffreps.bed
awk '{$4=$4 "_H3K9me2_HOTSPOTS_diffreps_WT34_34"}1' H3K9me2_HOTSPOTS_WT34_34_diffreps.bed > tmp.csv && mv -f tmp.csv H3K9me2_HOTSPOTS_WT34_34_diffreps.bed
#TASK4: Counting the enrichment at the identified loci relative to H3 (difference) 
#######USE bigWigAverageOverBed from UCSF utilities
#Refer https://www.biostars.org/p/320196/
cd /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions
cp /data/shivaprasad/Softwares/bigWigAverageOverBed /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions

bedfiles=`find /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Diffreps_output/Bedfiles/ -maxdepth 1 -type f -name "*.bed" | sort`
mkdir /data/shivaprasad/Softwares/bigWigAverageOverBed /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/elaborate_scores
mkdir /data/shivaprasad/Softwares/bigWigAverageOverBed /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/bed_scores
mkdir /data/shivaprasad/Softwares/bigWigAverageOverBed /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/sorted_tr_bed_scores
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/CONSOLIDATED_FILES_bed_scores/
for f in $bedfiles; do
    echo "Processing bedcoverage $f"
    bigwigs=`find /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Deeptools_k1/Bamcompare/H3IP_normalised/ -type f -maxdepth 1 -name "*.bw" | sort`
    for bw in $bigwigs; do
    	echo "+++++Processing bedcoverage BEDFILE $f for BIGWIG $bw ++++++"
    	output_elab_score=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/elaborate_scores/`basename "${bw/_diffH3_50bpbins.bw/.tab}"`
    	output_bed_score=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/bed_scores/`basename "${bw/_diffH3_50bpbins.bw/.bed}"`
    	sorted_tr_bed_scores=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/sorted_tr_bed_scores/`basename "${bw/_diffH3_50bpbins.bw/_srt_tr.bed}"`
    
    	./bigWigAverageOverBed $bw $f $output_elab_score -bedOut=$output_bed_score
    	bedtools sort -i $output_bed_score | cut -f 1,2,3,5 > $sorted_tr_bed_scores
    	CONSOLIDATED_FILES_bed_scores=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/CONSOLIDATED_FILES_bed_scores/`basename "${f/p.bed/_FINAL_OUT}"`
    	mkdir $CONSOLIDATED_FILES_bed_scores
    	mkdir $CONSOLIDATED_FILES_bed_scores/bedscores
    	mkdir $CONSOLIDATED_FILES_bed_scores/sorted_tr_bedscores
    	mkdir $CONSOLIDATED_FILES_bed_scores/elaborate_bedscores
    	mv  -v /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/elaborate_scores/* $CONSOLIDATED_FILES_bed_scores/elaborate_bedscores
    	mv  -v /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/bed_scores/* $CONSOLIDATED_FILES_bed_scores/bedscores
    	mv  -v /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/sorted_tr_bed_scores/* $CONSOLIDATED_FILES_bed_scores/sorted_tr_bedscores
    	    	
    	echo "completed bedcoverage $f"
   done
 
done

##Merging files 
beddir=`find /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/CONSOLIDATED_FILES_bed_scores/ -maxdepth 1 -type d | sort`
mkdir /data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/Consolidated_results

for dir in $beddir; do
	echo "processing $dir "
	mkdir $dir/sorted_tr_bedscores/consolidated
	cd $dir/sorted_tr_bedscores
	bedfile_temp=$dir/sorted_tr_bedscores/consolidated/temp_bed_file.txt
	score_file_temp=$dir/sorted_tr_bedscores/consolidated/temp_score_file.txt
	
	consolidated_file=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/Consolidated_results/`basename "${dir/_FINAL_OUT/_CONSOLIDATED.bed}"`
	consolidated_file_revised=/data/shivaprasad/Vivek/PolIV_ChIP_Seq_reanalysis/Files/Diffreps/Occupancy_regions/Consolidated_results/`basename "${dir/_FINAL_OUT/_CONSOLIDATED_revised.bed}"`
	awk '{a[FNR]=a[FNR]?a[FNR]" "$4:$4}END{for(i=1;i<=length(a);i++)print a[i]}' *.bed > $score_file_temp #Refer https://stackoverflow.com/questions/34523920/how-to-merge-specific-columns-from-many-files-in-one-file/34524125
	cat $dir/sorted_tr_bedscores/nrpd1-kd_H3K4me3_Rep1_srt_tr.bed | cut -f 1,2,3 > $bedfile_temp
	awk 'NR==FNR{a[NR]=$0;next}{print a[FNR],$0}' $bedfile_temp $score_file_temp > $consolidated_file # refer https://stackoverflow.com/questions/17095306/how-to-add-a-column-from-a-file-to-another-file#:~:text=For%20reference%2C%20paste%20%2Dd',tsv%20%3E%20test.	
	echo -e "chr\tstart\tend\tH3K4me3_nrpd1_R1\tH3K4me3_nrpd1_R2\tH3K9me2_nrpd1_R1\tH3K9me2_nrpd1_R2\tH3K9me2_nrpd1_R3\tH3K9me2_nrpd1_R4\tH3K27me3_nrpd1_R1\tH3K27me3_nrpd1_R2\tPolII_nrpd1_R1\tPolII_nrpd1_R2\tH3K4me3_WT_R1\tH3K4me3_WT_R2\tH3K9me2_WT_R1\tH3K9me2_WT_R2\tH3K9me2_WT_R3\tH3K9me2_WT_R4\tH3K27me3_WT_R1\tH3K27me3_WT_R2\tPolII_WT_R1\tPolII_WT_R2" | cat - $consolidated_file > $consolidated_file_revised
	 
done


