#!/bin/bash

### ChIP script for plotting figures with latest choices for manuscript (based on process then individually with Paper_ChIP_final.sh first)

############################################################################################################################################################################
### To merge the bam files for each sample

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
# do
	# case "$sample" in
		# MBD1) 	n=2
				# name1="ChIP3_MBD1"
				# name2="ChIP6_MBD1";;
		# MBD2)	n=2
				# name1="ChIP6_MBD2_rep1"
				# name2="ChIP6_MBD2_rep2";;
		# MBD4)	n=2
				# name1="ChIP8_MBD4_rep2"
				# name2="ChIP11_MBD4";;
		# MBD5)	n=2
				# name1="ChIP7_MBD5_rep1"
				# name2="ChIP7_MBD5_rep2";;
		# MBD6)	n=2
				# name1="ChIP3_MBD6"
				# name2="ChIP7_MBD6";;
		# SUVH1)	n=2
				# name1="ChIP2_SUVH1"
				# name2="ChIP11_SUVH1";;
		# SUVH3)	n=2
				# name1="ChIP7_SUVH3_rep1"
				# name2="ChIP7_SUVH3_rep2";;
		# WT)	n=1
			# name1="ChIP4_WT";;
	# esac
	# for type in HA INPUT
	# do
		# if [ $n == 2 ]
		# then
			# printf "\nmerging $n libraries $name1 and $name2 of $sample $type\n"
			# samtools merge -@10 manuscript/unsorted_${sample}_${type}.bam dedupbam/unique_${name1}_${type}.bam dedupbam/unique_${name2}_${type}.bam
		# else
			# printf "\nrenaming $n library $name1 of $sample $type\n"
			# cp dedupbam/unique_${name1}_${type}.bam manuscript/unsorted_${sample}_${type}.bam
		# fi
		# printf "\nsorting $sample $type\n"
		# samtools sort -@10 -O "bam" -T temp manuscript/unsorted_${sample}_${type}.bam -o manuscript/merged_${sample}_${type}.bam
		# printf "\nindexing $sample $type\n"
		# samtools index -@10 manuscript/merged_${sample}_${type}.bam
	# done
# done

# rm manuscript/unsorted*

#######################################################################################################################################################################
### To create a file of regions to avoid for peak calling (regions overepresented in WT input) [FC of 4 chosen based on histogram (cf. R code)]

# printf "\nbamCoverage for WT into blacklisted regions\n"
# bamCoverage -b dedupbam/unique_ChIP4_WT_INPUT.bam -o manuscript/coverage_WT.bedGraph -of "bedgraph" --normalizeUsing "CPM"

# awk -v OFS="\t" '$1~/^[1-9]/ && $4>3' manuscript/coverage_WT.bedGraph > manuscript/filter_coverage_WT.bedgraph
# bedtools merge -i manuscript/filter_coverage_WT.bedgraph -d 1000 > manuscript/blacklisted_regions.bed

# rm manuscript/filter_coverage_WT.bedgraph


##########################################################################################################################################################################
### To remove alignements mapping to the blacklisted regions and call peaks with the filtered files using nomodel extsize 150

# source /home/jcahn/working_data/Runs/Chip/apps/macs2/bin/activate

# printf "Sample\tType\tInput\tFiltered\n" > manuscript/numbers_alignements.txt
# printf "Sample\tPeaks\tSummits\n" > manuscript/numbers_peaks_summits.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
# do
	# for type in HA INPUT
	# do
		# printf "\nfiltering out blacklisted regions from $sample $type bam\n"
		# bedtools intersect -v -abam manuscript/merged_${sample}_${type}.bam -b manuscript/blacklisted_regions.bed | samtools view -h -b - > manuscript/filtered_${sample}_${type}.bam
		# java -jar /home/jcahn/working_data/Install/picard.jar SortSam I=manuscript/filtered_${sample}_${type}.bam O=manuscript/${sample}_${type}.bam SORT_ORDER=coordinate CREATE_INDEX=TRUE
		# inp=$(samtools view -c manuscript/merged_${sample}_${type}.bam)
		# samtools view -c manuscript/${sample}_${type}.bam | awk -v OFS="\t" -v s=$sample -v t=$type -v i=$inp '{print s,t,i,$0" ("$0/i*100"%)"}' >> manuscript/numbers_alignements.txt
	# done
	# printf "\ncalling peaks for $sample\n"
	# macs2 callpeak -t manuscript/${sample}_HA.bam -c manuscript/${sample}_INPUT.bam -f BAM -g 1.2e8 -q 0.01 -n ${sample} --nomodel --extsize 150 --keep-dup "all" --call-summits --outdir manuscript/
	# printf "\ncalculating $sample stats\n"
	# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2-1,$3,$10,$5-1,".",$8,$9}' manuscript/${sample}_peaks.xls > manuscript/summits_${sample}.bed
	# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2-1,$3}' manuscript/${sample}_peaks.xls | uniq | awk -v OFS="\t" -v s=$sample '{print $0,s"_"NR}' > manuscript/peaks_${sample}.bed
	# nbp=$(wc -l manuscript/peaks_${sample}.bed | awk '{print $1}')
	# nbs=$(wc -l manuscript/summits_${sample}.bed | awk '{print $1}')
	# awk -v OFS="\t" -v s=$sample -v p=$nbp -v n=$nbs 'BEGIN {print s,p,n" ("n/p"/peak)"}' >> manuscript/numbers_peaks_summits.txt
# done

# rm manuscript/merged_*
# rm manuscript/filtered_*

# deactivate

# #####################################################################################################################################################################
# #### To call peaks for individual reps and compare numbers and overlap with merged files

# source /home/jcahn/working_data/Runs/Chip/apps/macs2/bin/activate

# printf "Sample\tType\tRep\tInput\tFiltered\n" > manuscript/individual/numbers_alignements_individual.txt
# printf "Sample\tRep\tPeaks\tSummits\tOverlap(TotvsInd)\tOverlap(IndvsTot)\n" > manuscript/individual/numbers_peaks_summits_individual.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	name1="ChIP3_MBD1"
				# name2="ChIP6_MBD1";;
		# MBD2)	name1="ChIP6_MBD2_rep1"
				# name2="ChIP6_MBD2_rep2";;
		# MBD4)	name1="ChIP8_MBD4_rep2"
				# name2="ChIP11_MBD4";;
		# MBD5)	name1="ChIP7_MBD5_rep1"
				# name2="ChIP7_MBD5_rep2";;
		# MBD6)	name1="ChIP3_MBD6"
				# name2="ChIP7_MBD6";;
		# SUVH1)	name1="ChIP2_SUVH1"
				# name2="ChIP11_SUVH1";;
		# SUVH3)	name1="ChIP7_SUVH3_rep1"
				# name2="ChIP7_SUVH3_rep2";;
	# esac
	# for type in HA INPUT
	# do
		# printf "\nfiltering out blacklisted regions from $name1 $sample $type bam\n"
		# bedtools intersect -v -abam dedupbam/unique_${name1}_${type}.bam -b manuscript/blacklisted_regions.bed | samtools view -h -b - > manuscript/individual/filtered_${sample}_${type}_rep1.bam
		# java -jar /home/jcahn/working_data/Install/picard.jar SortSam I=manuscript/individual/filtered_${sample}_${type}_rep1.bam O=manuscript/individual/${sample}_${type}_rep1.bam SORT_ORDER=coordinate CREATE_INDEX=TRUE
		# printf "\nfiltering out blacklisted regions from $name2 $sample $type bam\n"
		# bedtools intersect -v -abam dedupbam/unique_${name2}_${type}.bam -b manuscript/blacklisted_regions.bed | samtools view -h -b - > manuscript/individual/filtered_${sample}_${type}_rep2.bam
		# java -jar /home/jcahn/working_data/Install/picard.jar SortSam I=manuscript/individual/filtered_${sample}_${type}_rep2.bam O=manuscript/individual/${sample}_${type}_rep2.bam SORT_ORDER=coordinate CREATE_INDEX=TRUE
		# inp1=$(samtools view -c dedupbam/unique_${name1}_${type}.bam)
		# inp2=$(samtools view -c manuscript/individual/${sample}_${type}_rep1.bam)
		# inp3=$(samtools view -c dedupbam/unique_${name2}_${type}.bam)
		# inp4=$(samtools view -c manuscript/individual/${sample}_${type}_rep2.bam)
		# awk -v OFS="\t" -v s=$sample -v t=$type -v i=$inp1 -v j=$inp2 -v k=$inp3 -v l=$inp4 'BEGIN {print s,t,"rep1",i,j" ("j/i*100"%)\n"s,t,"rep2",k,l" ("l/k*100"%)"}' >> manuscript/individual/numbers_alignements_individual.txt
	# done
	# tot=$(wc -l manuscript/peaks_${sample}.bed | awk '{print $1}')
	# for rep in rep1 rep2
	# do
		# printf "\ncalling peaks for $sample\n"
		# macs2 callpeak -t manuscript/individual/${sample}_HA_${rep}.bam -c manuscript/individual/${sample}_INPUT_${rep}.bam -f BAM -g 1.2e8 -q 0.01 -n ${sample}_${rep} --keep-dup "all" --call-summits --outdir manuscript/individual/ --nomodel --extsize 150
		# echo "calculating stats"
		# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2-1,$3,$10,$5-1,".",$8,$9}' manuscript/individual/${sample}_${rep}_peaks.xls > manuscript/individual/summits_${sample}_${rep}.bed
		# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2-1,$3}' manuscript/individual/${sample}_${rep}_peaks.xls | uniq | awk -v OFS="\t" -v s=$sample -v r=$rep '{print $0,s"_"r"_"NR}' > manuscript/individual/peaks_${sample}_${rep}.bed
		# over1=$(bedtools intersect -wa -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_${rep}.bed | wc -l | awk '{print $1}')
		# over2=$(bedtools intersect -wa -a manuscript/individual/peaks_${sample}_${rep}.bed -b manuscript/peaks_${sample}.bed | wc -l | awk '{print $1}')
		# nbp=$(wc -l manuscript/individual/peaks_${sample}_${rep}.bed | awk '{print $1}')
		# nbs=$(wc -l manuscript/individual/summits_${sample}_${rep}.bed | awk '{print $1}')
		# awk -v OFS="\t" -v s=$sample -v r=$rep -v p=$nbp -v n=$nbs -v o=$over1 -v x=$over2 -v t=$tot 'BEGIN {print s,r,p,n" ("n/p"/peak)",o" ("o/t*100"%)",x" ("x/p*100"%)"}' >> manuscript/individual/numbers_peaks_summits_individual.txt
	# done
# done

# rm manuscript/individual/filtered_*

# deactivate

###############################################################################################################################
## To get overlapping percentages between merged and each replicate by intersecting 10bp binned genome called as peaks or not
###############################################################################################################################

### To bin the genome in 10bp bins (not including the last bin)

# rm manuscript/10bp_genome_bin.bed
# while read chrom max
# do
	# echo "chromosome $chrom"
	# awk -v OFS="\t" -v c=$chrom -v m=$max 'BEGIN {a=0; b=10; while (b<=m) {print c, a, b; a=a+10; b=b+10}}' >> manuscript/binned/10bp_genome_bin.bed
	# tail manuscript/binned/10bp_genome_bin.bed
# done < ChromHMM/CHROMSIZES/tair10_nuc.txt

### To intersect with each sample and get percentage

# printf "Sample\tmerged\trep1\trep2\tmerged_rep1\trep1_rep2\tmerged_rep2\tmerged_rep1_rep2\n" > manuscript/binned/intersections_percentages2.txt
# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# bedtools intersect -wa -a manuscript/binned/10bp_genome_bin.bed -b manuscript/peaks_${sample}.bed > manuscript/binned/${sample}_merged.bed
	# a=$(wc -l manuscript/binned/${sample}_merged.bed | awk '{print $1}')
	# bedtools intersect -wa -a manuscript/binned/10bp_genome_bin.bed -b manuscript/individual/peaks_${sample}_rep1.bed > manuscript/binned/${sample}_rep1.bed
	# b=$(wc -l manuscript/binned/${sample}_rep1.bed | awk '{print $1}')
	# bedtools intersect -wa -a manuscript/binned/10bp_genome_bin.bed -b manuscript/individual/peaks_${sample}_rep2.bed > manuscript/binned/${sample}_rep2.bed
	# c=$(wc -l manuscript/binned/${sample}_rep2.bed | awk '{print $1}')
	# bedtools intersect -wa -a manuscript/binned/${sample}_merged.bed -b manuscript/binned/${sample}_rep1.bed > manuscript/binned/${sample}_merged_plus_rep1.bed
	# d=$(wc -l manuscript/binned/${sample}_merged_plus_rep1.bed | awk '{print $1}')
	# bedtools intersect -wa -a manuscript/binned/${sample}_rep1.bed -b manuscript/binned/${sample}_rep2.bed > manuscript/binned/${sample}_rep1_plus_rep2.bed
	# e=$(wc -l manuscript/binned/${sample}_rep1_plus_rep2.bed | awk '{print $1}')
	# bedtools intersect -wa -a manuscript/binned/${sample}_merged.bed -b manuscript/binned/${sample}_rep2.bed > manuscript/binned/${sample}_merged_plus_rep2.bed
	# f=$(wc -l manuscript/binned/${sample}_merged_plus_rep2.bed | awk '{print $1}')
	# bedtools intersect -wa -a manuscript/binned/${sample}_merged_plus_rep1.bed -b manuscript/binned/${sample}_rep2.bed > manuscript/binned/${sample}_merged_plus_rep1_plus_rep2.bed
	# g=$(wc -l manuscript/binned/${sample}_merged_plus_rep1_plus_rep2.bed | awk '{print $1}')
	# awk -v OFS="\t" -v s=$sample -v a=$a -v b=$b -v c=$c -v d=$d -v e=$e -v f=$f -v g=$g 'BEGIN {print s, a, b, c, d, e, f, g}' >> manuscript/binned/intersections_percentages2.txt
# done

# cat manuscript/binned/intersections_percentages2.txt

###############################################################################################################################
## To get files of annotates peaks for individual replicates and merged samples
###############################################################################################################################

# ### For each replicate

# awk -v OFS="\t" '$3~"transposable_element" {print $0}' annotations/TAIR10_GFF3_genes_transposons.gff | sed 's/Chr//' > annotations/TE.gff
# printf "Sample\tAnnotations\tCount\n" > manuscript/annotations/annotated_peaks_stats_ind.txt
# for sample in MBD1_rep1 MBD1_rep2 MBD2_rep1 MBD2_rep2 MBD4_rep1 MBD4_rep2 MBD5_rep1 MBD5_rep2 MBD6_rep1 MBD6_rep2 SUVH1_rep1 SUVH1_rep2 SUVH3_rep1 SUVH3_rep2
# do
	# echo $sample
	# annotatePeaks.pl manuscript/individual/peaks_${sample}.bed tair10 -gff annotations/TAIR10_GFF3_genes_transposons.gff > manuscript/annotations/tempA_peaks.txt
	# awk -v OFS="\t" '(NR>1) && ($9=="Intergenic" || $9=="Chr") {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $9=="promoter-TSS" {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1b_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $9=="TTS" {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1c_peaks.bed
	# awk -v OFS="\t" '(NR>1) && ($9=="exon" || $9=="3'"'"'" || $9=="5'"'"'") {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1d_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $9=="intron" {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1e_peaks.bed
	# bedtools intersect -a manuscript/annotations/temp1_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2.txt
	# bedtools intersect -a manuscript/annotations/temp1b_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2b.txt
	# bedtools intersect -a manuscript/annotations/temp1c_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2c.txt
	# bedtools intersect -a manuscript/annotations/temp1d_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2d.txt
	# bedtools intersect -a manuscript/annotations/temp1e_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2e.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE"; else print $1,$2,$3,$4,"Intergenic"}' manuscript/annotations/temp2.txt > manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_prom"; else print $1,$2,$3,$4,"Promoter"}' manuscript/annotations/temp2b.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_TTS"; else print $1,$2,$3,$4,"TTS"}' manuscript/annotations/temp2c.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_exon"; else print $1,$2,$3,$4,"Exon"}' manuscript/annotations/temp2d.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_intron"; else print $1,$2,$3,$4,"Intron"}' manuscript/annotations/temp2e.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($5=="TE_intron" || $5=="TE_exon") print $1,$2,$3,$4,"TE_gene"; else print $1,$2,$3,$4,$5}' manuscript/annotations/temp3.txt > manuscript/annotations/temp4.txt
	# sort -k1,1 -k2,2n manuscript/annotations/temp4.txt | uniq | awk -v OFS="\t" -v s=$sample '{print $4,$5}' | sort -k1,1 > manuscript/annotations/annotated_peaks_${sample}.txt
	# head manuscript/annotations/annotated_peaks_${sample}.txt
	# awk -v OFS="\t" '{print $2}' manuscript/annotations/annotated_peaks_${sample}.txt | sort | uniq -c | awk -v OFS="\t" -v s=$sample '{print s,$2,$1}' >> manuscript/annotations/annotated_peaks_stats_ind.txt
	# rm manuscript/annotations/temp*
# done

# cat manuscript/annotations/annotated_peaks_stats_ind.txt

# ### For merged files

# awk -v OFS="\t" '$3~"transposable_element" {print $0}' annotations/TAIR10_GFF3_genes_transposons.gff | sed 's/Chr//' > annotations/TE.gff
# printf "Sample\tAnnotations\tCount\n" > manuscript/annotations/annotated_peaks_stats_merged.txt
# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# echo $sample
	# annotatePeaks.pl manuscript/peaks_${sample}.bed tair10 -gff annotations/TAIR10_GFF3_genes_transposons.gff > manuscript/annotations/tempA_peaks.txt
	# awk -v OFS="\t" '(NR>1) && ($9=="Intergenic" || $9=="Chr") {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $9=="promoter-TSS" {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1b_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $9=="TTS" {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1c_peaks.bed
	# awk -v OFS="\t" '(NR>1) && ($9=="exon" || $9=="3'"'"'" || $9=="5'"'"'") {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1d_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $9=="intron" {print $2,$3,$4,$1}' manuscript/annotations/tempA_peaks.txt | sort -k1,1 -k2,2n > manuscript/annotations/temp1e_peaks.bed
	# bedtools intersect -a manuscript/annotations/temp1_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2.txt
	# bedtools intersect -a manuscript/annotations/temp1b_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2b.txt
	# bedtools intersect -a manuscript/annotations/temp1c_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2c.txt
	# bedtools intersect -a manuscript/annotations/temp1d_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2d.txt
	# bedtools intersect -a manuscript/annotations/temp1e_peaks.bed -b annotations/TE.gff -loj | sort -k1,1 -k2,2n > manuscript/annotations/temp2e.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE"; else print $1,$2,$3,$4,"Intergenic"}' manuscript/annotations/temp2.txt > manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_prom"; else print $1,$2,$3,$4,"Promoter"}' manuscript/annotations/temp2b.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_TTS"; else print $1,$2,$3,$4,"TTS"}' manuscript/annotations/temp2c.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_exon"; else print $1,$2,$3,$4,"Exon"}' manuscript/annotations/temp2d.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_intron"; else print $1,$2,$3,$4,"Intron"}' manuscript/annotations/temp2e.txt >> manuscript/annotations/temp3.txt
	# awk -v OFS="\t" '{if ($5=="TE_intron" || $5=="TE_exon") print $1,$2,$3,$4,"TE_gene"; else print $1,$2,$3,$4,$5}' manuscript/annotations/temp3.txt > manuscript/annotations/temp4.txt
	# sort -k1,1 -k2,2n manuscript/annotations/temp4.txt | uniq | awk -v OFS="\t" -v s=$sample '{print $4,$5}' | sort -k1,1 > manuscript/annotations/annotated_peaks_${sample}.txt
	# head manuscript/annotations/annotated_peaks_${sample}.txt
	# awk -v OFS="\t" '{print $2}' manuscript/annotations/annotated_peaks_${sample}.txt | sort | uniq -c | awk -v OFS="\t" -v s=$sample '{print s,$2,$1}' >> manuscript/annotations/annotated_peaks_stats_merged.txt
	# rm manuscript/annotations/temp*
# done

# cat manuscript/annotations/annotated_peaks_stats_merged.txt

############################################################################################################################################################
#### To get distance of peaks to centromeres (as percentage of chromosome arm length
############################################################################################################################################################

### For individual reps

# printf "1\t14511722\t14803970\n2\t3611839\t3633423\n3\t13589757\t13867121\n4\t3133664\t3133674\n5\t11194538\t11723210\n" > manuscript/distance/centromere_positions.bed

# printf "Sample\tRep\tPeak_ID\tDistance\n" > manuscript/distance/distance_to_centromeres_individual2.txt
# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# for rep in rep1 rep2
	# do
		# for chr in 1 2 3 4 5
		# do
			# cs=$(awk -v c=$chr '$1==c {print $2}' manuscript/distance/centromere_positions.bed)
			# ce=$(awk -v c=$chr '$1==c {print $3}' manuscript/distance/centromere_positions.bed)
			# max=$(awk -v c=$chr '$1==c {print $2}' ChromHMM/CHROMSIZES/tair10_nuc.txt)
			# cat manuscript/individual/peaks_${sample}_${rep}.bed | awk -v OFS="\t" -v c=$chr -v a=$cs -v b=$ce -v m=$max -v s=$sample -v r=$rep '$1==c {if ($3<a) d=((a-$3)/(a-1))*100; else if ($2>b) d=(($2-b)/(m-b))*100; else d=0; print s,r,s"_"r"_peaks_"NR,d}' >> manuscript/distance/distance_to_centromeres_individual2.txt
		# done
	# done
# done

# ### For merged samples

# printf "Sample\tPeak_ID\tDistance\n" > manuscript/distance/distance_to_centromeres_merged.txt
# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# for chr in 1 2 3 4 5
	# do
		# cs=$(awk -v c=$chr '$1==c {print $2}' manuscript/distance/centromere_positions.bed)
		# ce=$(awk -v c=$chr '$1==c {print $3}' manuscript/distance/centromere_positions.bed)
		# max=$(awk -v c=$chr '$1==c {print $2}' ChromHMM/CHROMSIZES/tair10_nuc.txt)
		# cat manuscript/peaks_${sample}.bed | awk -v OFS="\t" -v c=$chr -v a=$cs -v b=$ce -v m=$max -v s=$sample '$1==c {if ($3<a) d=((a-$3)/(a-1))*100; else if ($2>b) d=(($2-b)/(m-b))*100; else d=0; print s,s"_peaks_"NR,d}' >> manuscript/distance/distance_to_centromeres_merged.txt
	# done
# done


############################################################################################################################################################
#### To get average methylation at each peak for merged samples
############################################################################################################################################################

# source /home/tstuart/working_data/virtualenvs/env/bin/activate

# python find_methylation_at_ChIPseq_peaks.py

# deactivate

# #### to merge the methylation data into one table for each sample

# printf "Sample\tPeak_ID\tmutant\tmCG\tCov_mCG\tNb_mCG\tmCHG\tCov_mCHG\tNb_mCHG\tmCHH\tCov_mCHH\tNb_mCHH\n" > manuscript/methyl/all_peaks_mC.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# for mutant in WT_seedlings
	# do
		# sed 's/\r//' manuscript/methyl/mC_${sample}_${mutant}_CG.tsv | sort -k1,1 > manuscript/methyl/interCG_${sample}_met_${mutant}.tsv
		# sed 's/\r//' manuscript/methyl/mC_${sample}_${mutant}_CHG.tsv | sort -k1,1 > manuscript/methyl/interCHG_${sample}_met_${mutant}.tsv
		# sed 's/\r//' manuscript/methyl/mC_${sample}_${mutant}_CHH.tsv | sort -k1,1 > manuscript/methyl/interCHH_${sample}_met_${mutant}.tsv
		# join manuscript/methyl/interCG_${sample}_met_${mutant}.tsv manuscript/methyl/interCHG_${sample}_met_${mutant}.tsv | join - manuscript/methyl/interCHH_${sample}_met_${mutant}.tsv | awk -v OFS="\t" -v m=$mutant '{print $1,m,$2,$3,$4,$5,$6,$7,$8,$9,$10}' | sort -k1,1 >> manuscript/methyl/tot_mC_${sample}.txt
		# tail manuscript/methyl/tot_mC_${sample}.txt
	# done
	# sort -k1,1 manuscript/methyl/tot_mC_${sample}.txt > manuscript/methyl/peaks_mC_${sample}.txt
	# awk -v OFS="\t" -v s=$sample '{print s,$0}' manuscript/methyl/peaks_mC_${sample}.txt >> manuscript/methyl/all_peaks_mC.txt
# done

# rm manuscript/methyl/inter*
# rm manuscript/methyl/tot_mC*

############################################################################################################################################################
#### To plot deeptools heatmaps and metaplots
############################################################################################################################################################

#### creating bigwig files for ChIP

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
# do
	# printf "ChIP bigwig $sample\n"
	# bamCompare -b1 manuscript/${sample}_HA.bam -b2 manuscript/${sample}_INPUT.bam -o manuscript/${sample}.bw -p 5 --binSize 1 --scaleFactorsMethod "None" --normalizeUsing CPM
# done

#### creating bigwig files for ChIP individual replicates

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# printf "ChIP bigwig $sample\n"
	# bamCompare -b1 manuscript/individual/${sample}_HA_rep1.bam -b2 manuscript/individual/${sample}_INPUT_rep1.bam -o manuscript/${sample}_rep1.bw -p 5 --binSize 1 --scaleFactorsMethod "None" --normalizeUsing CPM
	# bamCompare -b1 manuscript/individual/${sample}_HA_rep2.bam -b2 manuscript/individual/${sample}_INPUT_rep2.bam -o manuscript/${sample}_rep2.bw -p 5 --binSize 1 --scaleFactorsMethod "None" --normalizeUsing CPM
# done

############################################################################################################################################################
#### To plot comparison between ChIP and DAP for MBD5 and MBD6
############################################################################################################################################################

#### After running DAPseq pipeline

# for sample in MBD5 MBD6
# do
	# printf "\ngetting peaks for $sample\n"
	# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$5-1,$5,$10}' manuscript/DAP/${sample}_DAP_peaks.xls > manuscript/DAP/summits_${sample}_DAP.bed
	# printf "computing DAP matrix for $sample...\n"
	# computeMatrix reference-point --referencePoint "TSS" -R manuscript/DAP/summits_${sample}_DAP.bed -S manuscript/DAP/DAP_${sample}.bw manuscript/DAP/ampDAP_${sample}.bw manuscript/${sample}.bw -bs 50 -a 2000 -b 2000 -p 10 --skipZeros -o manuscript/DAP/matrix_${sample}_summits.gz
	# printf "plotting heatmaps for $sample...\n"
	# plotHeatmap -m manuscript/DAP/matrix_${sample}_summits.gz -out manuscript/DAP/heatmap_summits_${sample}.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.9 --colorMap 'coolwarm' --refPointLabel 'Peak summit'
	# plotHeatmap -m manuscript/DAP/matrix_${sample}_summits.gz -out manuscript/DAP/heatmap_summits_${sample}v2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.9 --colorMap 'seismic' --refPointLabel 'Peak summit'
# done

#### To compare MBD5 and MBD6

# printf "computing DAP matrix...\n"
# computeMatrix reference-point --referencePoint "TSS" -R manuscript/DAP/summits_MBD5_DAP.bed -S manuscript/DAP/DAP_MBD5.bw manuscript/DAP/ampDAP_MBD5.bw manuscript/MBD5.bw manuscript/DAP/DAP_MBD6.bw manuscript/DAP/ampDAP_MBD6.bw manuscript/MBD6.bw -bs 50 -a 2000 -b 2000 -p 10 --skipZeros -o manuscript/DAP/matrix_MBD5_6_summits.gz
# printf "plotting heatmaps...\n"
# plotHeatmap -m manuscript/DAP/matrix_MBD5_6_summits.gz -out manuscript/DAP/heatmap_summits_MBD5_6.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.9 --colorMap 'coolwarm' --refPointLabel 'Peak summit'

# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2-1,$3,$10}' manuscript/DAP/MBD5_DAP_peaks.xls > manuscript/DAP/MBD5_DAP_peaks.bed
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2-1,$3,$10}' manuscript/DAP/MBD6_DAP_peaks.xls > manuscript/DAP/MBD6_DAP_peaks.bed
# printf "MBD5 total peaks: "
# wc -l manuscript/DAP/MBD5_DAP_peaks.bed ## 9266
# printf "MBD6 total peaks: "
# wc -l manuscript/DAP/MBD6_DAP_peaks.bed ## 10284
# printf "MBD5 MBD6 peaks: "
# bedtools intersect -wa -a manuscript/DAP/MBD5_DAP_peaks.bed -b manuscript/DAP/MBD6_DAP_peaks.bed | wc -l ## 5663


############################################################################################################################################################
#### making heatmaps at GbM genes
############################################################################################################################################################

#### Get methylation at all protein coding genes

# source /home/tstuart/working_data/virtualenvs/env/bin/activate
# python find_methylation_at_bedfiles.py
# deactivate

# #### Split into pseudo genes (methylated in mCHG and mCHH (>2%), GbM genes (>5% mCG) and unmethylated genes

# sed 's/\r//' manuscript/methyl/mC_protein_coding_gene_WT_seedlings_CG.tsv | sort -k1,1 > manuscript/methyl/interCG_protein_coding_gene.tsv
# sed 's/\r//' manuscript/methyl/mC_protein_coding_gene_WT_seedlings_CHG.tsv | sort -k1,1 > manuscript/methyl/interCHG_protein_coding_gene.tsv
# sed 's/\r//' manuscript/methyl/mC_protein_coding_gene_WT_seedlings_CHH.tsv | sort -k1,1 > manuscript/methyl/interCHH_protein_coding_gene.tsv
# join manuscript/methyl/interCG_protein_coding_gene.tsv manuscript/methyl/interCHG_protein_coding_gene.tsv | join - manuscript/methyl/interCHH_protein_coding_gene.tsv | awk -v OFS="\t" '{if ($5+$8>2) print $1 > "manuscript/annotations/inter_mC_pseudogenes.txt"; else if ($2>5) print $1 > "manuscript/annotations/inter_mC_GbM_genes.txt"; else print $1 > "manuscript/annotations/inter_mC_unmethylated.txt"; }'

# wc -l manuscript/annotations/all_protein_coding_gene.bed > manuscript/annotations/stats_genes_methylation.txt
# sort -k4,4 manuscript/annotations/all_protein_coding_gene.bed > manuscript/annotations/inter_allgenes.bed

# for type in GbM_genes pseudogenes unmethylated
# do
	# ### get number of genes in each category
	# join -1 1 -2 4 manuscript/annotations/inter_mC_${type}.txt manuscript/annotations/inter_allgenes.bed | awk -v OFS="\t" '{print $2,$3,$4,$1,$5,$6}' > manuscript/annotations/mC_${type}.bed
	# wc -l manuscript/annotations/mC_${type}.bed >> manuscript/annotations/stats_genes_methylation.txt
	# cat manuscript/annotations/stats_genes_methylation.txt
# done

############### plotting GbM heatmap

# label=$(grep "GbM" annotations/stats_genes_methylation.txt | awk '{print "GbM_genes("$1")"}' )

# printf "computing mC matrix for GbM genes\n"
# computeMatrix scale-regions -R manuscript/annotations/mC_GbM_genes.bed -S manuscript/methyl/CG_WT.bw manuscript/methyl/CHG_WT.bw manuscript/methyl/CHH_WT.bw -bs 50 -b 0 -a 0 -m 2000 -p 10 -o manuscript/deeptools/matrix_GbM_mC.gz
# printf "Plotting mC heatmap for GbM genes\n"
# plotHeatmap -m manuscript/deeptools/matrix_GbM_mC.gz -out manuscript/paper_plots/heatmap_GbM_mC.pdf --sortRegions descend --sortUsing mean --sortUsingSamples 1 --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel CG CHG CHH --regionsLabel $label --whatToShow "heatmap and colorbar" --outFileSortedRegions manuscript/deeptools/regions_GbM.txt --zMin 0 --zMax 100

# printf "computing ChIP matrix for GbM genes\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_GbM.txt -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 0 -a 0 -m 2000 -p 10 -o manuscript/deeptools/matrix_GbM_ChIP.gz
# printf "Plotting ChIP heatmap for GbM genes\n"
# plotHeatmap -m manuscript/deeptools/matrix_GbM_ChIP.gz -out manuscript/paper_plots/heatmap_GbM_ChIP.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'seismic' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --zMin -1.5 --zMax 1.5 --whatToShow "heatmap and colorbar"

############################################################################################################################################################
#### To plot heatmaps at clustered peaks based on ChIP signal in all samples (see code below for how clustering was made)
############################################################################################################################################################

# awk -v OFS="\t" '{print $1,$2,$3}' manuscript/deeptools/final_MBD4_peaks.bed > manuscript/deeptools/tempAL.bed 
# cat manuscript/deeptools/final_common_peaks.bed manuscript/deeptools/tempAL.bed | sort -k1,1n -k2,2n > manuscript/deeptools/finalv2_common_peaks.bed
# cp manuscript/deeptools/final_MBD1_peaks.bed manuscript/deeptools/finalv2_MBD1_peaks.bed
# cp manuscript/deeptools/final_MBD2_5_6_peaks.bed manuscript/deeptools/finalv2_MBD2_5_6_peaks.bed
# cp manuscript/deeptools/final_MBD5_6_peaks.bed manuscript/deeptools/finalv2_MBD5_6_peaks.bed
# cp manuscript/deeptools/final_MBD5_6_SUVH1_3_peaks.bed manuscript/deeptools/finalv2_MBD5_6_SUVH1_3_peaks.bed

# rm manuscript/deeptools/regions_labels.txt
# for file in manuscript/deeptools/finalv2_*.bed
# do
	# namext=${file##*/finalv2_}
	# name=${namext%_peaks.bed}
	# wc -l $file | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/regions_labels.txt
# done
# clustername=$(cat manuscript/deeptools/regions_labels.txt)
	
# printf "computing matrix part1 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/finalv2_*_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf1.gz
# printf "plotting heatmap part1 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf1.gz -out manuscript/paper_plots/heatmap_grouped_vf1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions manuscript/deeptools/regions_grouped_peaks.bed

# ### plotting final heatmap of mC over the 5 clusters

# printf "computing matrix part2 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_grouped_peaks.bed -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 100 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf2.gz
# printf "plotting heatmap part2 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf2.gz -out manuscript/paper_plots/heatmap_grouped_vf2.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'Oranges' 'Oranges' 'Oranges' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel mCG mCHG mCHH --regionsLabel ${clustername} --zMin 0 --zMax 100 80 30 --whatToShow "heatmap and colorbar"

# #########################################################################################
# ### To get annotation files of grouped peaks
# #########################################################################################

# #### randomomize the common peak file to see if some annotations are overrepresented

# bedtools shuffle -i manuscript/deeptools/finalv2_common_peaks.bed -g manuscript/annotations/ath.genome | sort -k1,1n -k2,2n > manuscript/deeptools/nalv2_random_peaks.bed

# ###

# printf "Group\tPeak_ID\tType\tAGI\tTE_ID\n" > manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt
# for file in manuscript/deeptools/*alv2_*_peaks.bed
# do
	# namext=${file##*/*alv2_}
	# name=${namext%.bed}
	# printf "$name\n"
	# awk -v OFS="\t" -v n=$name '{print $1,$2,$3,n"_"NR,".","."}' $file > manuscript/deeptools/temp0_${name}.bed
	# bedtools intersect -wo -a manuscript/deeptools/temp0_${name}.bed -b manuscript/annotations/all_protein_coding_gene.bed | awk -v OFS="\t" -v n=$name '{print n,$4, "Protein_Coding_Gene",$10,"NA"}' >> manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt
	# bedtools intersect -v -a manuscript/deeptools/temp0_${name}.bed -b manuscript/annotations/all_protein_coding_gene.bed > manuscript/deeptools/temp1_${name}.bed
	# bedtools intersect -wo -a manuscript/deeptools/temp1_${name}.bed -b manuscript/annotations/all_RNAs.bed -wo | awk -v OFS="\t" -v n=$name '{print n,$4,"RNA_gene",$10,"NA"}' >> manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt
	# bedtools intersect -v -a manuscript/deeptools/temp1_${name}.bed -b manuscript/annotations/all_RNAs.bed > manuscript/deeptools/temp2_${name}.bed
	# bedtools intersect -wo -a manuscript/deeptools/temp2_${name}.bed -b manuscript/annotations/all_TE_genes.bed | awk -v OFS="\t" -v n=$name '{print n,$4,"TE_gene",$10,"NA"}' >> manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt
	# bedtools intersect -v -a manuscript/deeptools/temp2_${name}.bed -b manuscript/annotations/all_TE_genes.bed > manuscript/deeptools/temp3_${name}.bed
	# bedtools intersect -wo -a manuscript/deeptools/temp3_${name}.bed -b manuscript/annotations/all_TEs.bed | awk -v OFS="\t" -v n=$name '{print n,$4,"TE",$10}' | awk -F"[|]" -v OFS="\t" '{print $1,$2}' >> manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt
	# bedtools intersect -v -a manuscript/deeptools/temp3_${name}.bed -b manuscript/annotations/all_TEs.bed | awk -v OFS="\t" -v n=$name '{print n,$4,"Intergenic","NA","NA"}' >> manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt
# done

# rm manuscript/deeptools/temp*

# awk 'NR > 1 {print $1,$3}' manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt | sort | uniq -c | awk -v OFS="\t" '{print $2,$3,$1}'> manuscript/deeptools/annotated_grouped_peaks_statsv2.txt

########## To remove AGI already present in a cluster (common > MBD2_5_6 > MBD1 > MBD5_6 > MBD5_6_SUVH1_SUVH3)

# rm manuscript/deeptools/*_protein_coding_gene.bed
# rm manuscript/deeptools/*temp.txt

# ### To get AGI intermediate files for each group peaks
# for group in common_peaks MBD1_peaks MBD2_5_6_peaks MBD5_6_SUVH1_3_peaks MBD5_6_peaks
# do
	# awk -v OFS="\t" -v g=$group '$1 == g && $3 == "Protein_Coding_Gene" {print $1,$4}' manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt | sort | uniq > manuscript/deeptools/${group}_AGI_temp.txt
# done

# ### To get gene bed files of common peaks
# while read name AGI
# do
	# grep -w $AGI manuscript/annotations/all_protein_coding_gene.bed >> manuscript/deeptools/common_peaks_protein_coding_gene.bed
# done < manuscript/deeptools/common_peaks_AGI_temp.txt

# ### To get gene bed files of MBD2/5/6 peaks (removing ones called in common peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tMBD256\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_protein_coding_gene.bed >> manuscript/deeptools/MBD2_5_6_peaks_protein_coding_gene.bed
	# fi
# done < manuscript/deeptools/MBD2_5_6_peaks_AGI_temp.txt

# ### To get gene bed files of MBD1 peaks (removing ones called in common and MBD2/5/6 peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tMBD1\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD2_5_6_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tMBD1\tMBD256\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_protein_coding_gene.bed >> manuscript/deeptools/MBD1_peaks_protein_coding_gene.bed
	# fi
# done < manuscript/deeptools/MBD1_peaks_AGI_temp.txt

# ### To get gene bed files of MBD5/6 peaks (removing ones called in common, MBD2/5/6 and MBD1 peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tMBD56\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD2_5_6_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tMBD56\tMBD256\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD1_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tMBD56\tMBD1\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_protein_coding_gene.bed >> manuscript/deeptools/MBD5_6_peaks_protein_coding_gene.bed
	# fi
# done < manuscript/deeptools/MBD5_6_peaks_AGI_temp.txt

# ### To get gene bed files of MBD5/6/SUVH1/3 peaks (removing ones called in common, MBD2/5/6, MBD1 and MBD5/6 peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tSUVH13\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD2_5_6_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tSUVH13\tMBD256\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD1_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tSUVH13\tMBD1\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD5_6_peaks_protein_coding_gene.bed
	# then
		# printf "$AGI\tSUVH13\tMBD56\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_protein_coding_gene.bed >> manuscript/deeptools/MBD5_6_SUVH1_3_peaks_protein_coding_gene.bed
	# fi
# done < manuscript/deeptools/MBD5_6_SUVH1_3_peaks_AGI_temp.txt

# ### To check that the numbers match
# printf "common peaks AGI:\n" 
# wc -l manuscript/deeptools/common_peaks_AGI_temp.txt
# wc -l manuscript/deeptools/common_peaks_protein_coding_gene.bed

# printf "\nMBD256 peaks AGI:\n" 
# wc -l manuscript/deeptools/MBD2_5_6_peaks_AGI_temp.txt
# wc -l manuscript/deeptools/MBD2_5_6_peaks_protein_coding_gene.bed
# printf "MBD256 peaks in common:\n" 
# awk '$2 == "MBD256" && $3 == "common"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l

# printf "\nMBD1 peaks AGI:\n" 
# wc -l manuscript/deeptools/MBD1_peaks_AGI_temp.txt
# wc -l manuscript/deeptools/MBD1_peaks_protein_coding_gene.bed
# printf "MBD1 peaks in common:\n" 
# awk '$2 == "MBD1" && $3 == "common"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l
# printf "MBD1 peaks in MBD256:\n" 
# awk '$2 == "MBD1" && $3 == "MBD256"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l

# printf "\nMBD56 peaks AGI:\n" 
# wc -l manuscript/deeptools/MBD5_6_peaks_AGI_temp.txt
# wc -l manuscript/deeptools/MBD5_6_peaks_protein_coding_gene.bed
# printf "MBD56 peaks in common:\n" 
# awk '$2 == "MBD56" && $3 == "common"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l
# printf "MBD56 peaks in MBD256:\n" 
# awk '$2 == "MBD56" && $3 == "MBD256"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l
# printf "MBD56 peaks in MBD1:\n" 
# awk '$2 == "MBD56" && $3 == "MBD1"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l

# printf "\nMBD56SUVH13 peaks AGI:\n" 
# wc -l manuscript/deeptools/MBD5_6_SUVH1_3_peaks_AGI_temp.txt
# wc -l manuscript/deeptools/MBD5_6_SUVH1_3_peaks_protein_coding_gene.bed
# printf "MBD56/SUVH13 peaks in common:\n" 
# awk '$2 == "SUVH13" && $3 == "common"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l
# printf "MBD56/SUVH13 peaks in MBD256:\n" 
# awk '$2 == "SUVH13" && $3 == "MBD256"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l
# printf "MBD56/SUVH13 peaks in MBD1:\n"
# awk '$2 == "SUVH13" && $3 == "MBD1"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l
# printf "MBD56/SUVH13 peaks in MBD56:\n"
# awk '$2 == "SUVH13" && $3 == "MBD56"' manuscript/deeptools/two_groups_common_peaks_temp.txt | wc -l

# rm manuscript/deeptools/*temp.txt

# rm manuscript/deeptools/protein_coding_gene_labels.txt
# for file in manuscript/deeptools/*_peaks_protein_coding_gene.bed
# do
	# namext=${file##*/}
	# name=${namext%_protein_coding_gene.bed}
	# wc -l $file | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/protein_coding_gene_labels.txt
# done
# clustername=$(cat manuscript/deeptools/protein_coding_gene_labels.txt)

# printf "computing matrix part1 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/*_peaks_protein_coding_gene.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/grouped_protein_coding_gene_vf1.gz
# printf "plotting heatmap part1 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_protein_coding_gene_vf1.gz -out manuscript/paper_plots/heatmap_protein_coding_genes_vf1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions manuscript/deeptools/regions_protein_coding_genes.txt

# # #### getting mC heatmap

# printf "computing methylation matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_protein_coding_genes.txt -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 100 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_mC.gz
# printf "plotting methylation profiles for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/matrix_grouped_peaks_mC.gz -out manuscript/paper_plots/heatmap_protein_coding_genes_mC.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel mCG mCHG mCHH --regionsLabel ${clustername} --zMin 0 --zMax 100 80 30 --whatToShow "heatmap and colorbar"

# #### getting heatmap with replicates and WT

# printf "computing matrix part1 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_protein_coding_genes.txt -S manuscript/MBD1_rep*.bw manuscript/MBD2_rep*.bw manuscript/MBD4_rep*.bw manuscript/MBD5_rep*.bw manuscript/MBD6_rep*.bw manuscript/SUVH1_rep*.bw manuscript/SUVH3_rep*.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/grouped_protein_coding_gene_vf_rep.gz
# printf "plotting heatmap part1 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_protein_coding_gene_vf_rep.gz -out manuscript/paper_plots/heatmap_protein_coding_genes_vf_rep.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1_rep1 MBD1_rep2 MBD2_rep1 MBD2_rep2 MBD4_rep1 MBD4_rep2 MBD5_rep1 MBD5_rep2 MBD6_rep1 MBD6_rep2 SUVH1_rep1 SUVH1_rep2 SUVH3_rep1 SUVH3_rep2 WT RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar"

# #### getting distance to centromeres

# rm manuscript/deeptools/distance_to_centromeres*
# printf "1\t14511722\t14803970\n2\t3611839\t3633423\n3\t13589757\t13867121\n4\t3133664\t3133674\n5\t11194538\t11723210\n" > annotations/centromere_positions.bed
# for chr in 1 2 3 4 5
# do
	# cs=$(awk -v c=$chr '$1==c {print $2}' annotations/centromere_positions.bed)
	# ce=$(awk -v c=$chr '$1==c {print $3}' annotations/centromere_positions.bed)
	# max=$(awk -v c=$chr '$1==c {print $2}' methyl/chromosome_sizes.txt)
	# cat manuscript/deeptools/regions_protein_coding_genes.txt | awk -v OFS="\t" -v c=$chr -v s=$cs -v e=$ce -v m=$max '$1==c {if ($3<s) d=((s-$3)/(s-1))*100; else if ($2>e) d=(($2-e)/(m-e))*100; else d=0; print $1,$2,$3,d}'  > manuscript/deeptools/distance_to_centromeres_${chr}.bedGraph
# done
# cat manuscript/deeptools/distance_to_centromeres_*.bedGraph | sort -k1,1n -k2,2n | bedtools merge -c 4 -o min -i - > manuscript/deeptools/distance_to_centromeres.bedGraph

# bedGraphToBigWig manuscript/deeptools/distance_to_centromeres.bedGraph methyl/chromosome_sizes.txt manuscript/deeptools/distance_to_centromeres.bw
# printf "computing matrix part3 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_protein_coding_genes.txt -S manuscript/deeptools/distance_to_centromeres.bw manuscript/deeptools/distance_to_centromeres.bw -bs 10 -b 0 -a 0 -m 300 -p 10 -o manuscript/deeptools/distance_to_centro_genes.gz
# printf "plotting heatmap part3 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/distance_to_centro_genes.gz -out manuscript/paper_plots/heatmap_distance_to_centro_genes.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' 'gray' --startLabel "" --endLabel "" --xAxisLabel "Gene" --samplesLabel "Distance to centromere" "" --regionsLabel ${clustername} --whatToShow "heatmap and colorbar" --zMin 0 0 --zMax 100 100


# ######### To remove TE already present in a cluster (common > MBD2_5_6 > MBD1 > MBD5_6 > MBD5_6_SUVH1_SUVH3)

# rm manuscript/deeptools/*_TEs.bed
# rm manuscript/deeptools/*temp.txt

# cat manuscript/annotations/all_TEs.bed manuscript/annotations/all_TE_genes.bed | sort -k1,1n -k2,2n > manuscript/annotations/all_TEs_and_TE_genes.bed

# ### To get AGI intermediate files for each group peaks
# for group in common_peaks MBD1_peaks MBD2_5_6_peaks MBD5_6_SUVH1_3_peaks MBD5_6_peaks
# do
	# awk -v OFS="\t" -v g=$group '$1 == g && $3 ~ "TE" {print $1,$4}' manuscript/deeptools/annotated_grouped_peaks_AGIv2.txt | sort | uniq > manuscript/deeptools/${group}_TE_temp.txt
# done

# ### To get TE bed files of common peaks
# while read name AGI
# do
	# grep -w $AGI manuscript/annotations/all_TEs_and_TE_genes.bed >> manuscript/deeptools/common_peaks_TEs.bed
# done < manuscript/deeptools/common_peaks_TE_temp.txt

# ### To get gene bed files of MBD2/5/6 peaks (removing ones called in common peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_TEs.bed
	# then
		# printf "$AGI\tMBD256\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_TEs_and_TE_genes.bed >> manuscript/deeptools/MBD2_5_6_peaks_TEs.bed
	# fi
# done < manuscript/deeptools/MBD2_5_6_peaks_TE_temp.txt

# ### To get gene bed files of MBD1 peaks (removing ones called in common and MBD2/5/6 peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_TEs.bed
	# then
		# printf "$AGI\tMBD1\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD2_5_6_peaks_TEs.bed
	# then
		# printf "$AGI\tMBD1\tMBD256\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_TEs_and_TE_genes.bed >> manuscript/deeptools/MBD1_peaks_TEs.bed
	# fi
# done < manuscript/deeptools/MBD1_peaks_TE_temp.txt

# ### To get gene bed files of MBD5/6 peaks (removing ones called in common, MBD2/5/6 and MBD1 peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_TEs.bed
	# then
		# printf "$AGI\tMBD56\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD2_5_6_peaks_TEs.bed
	# then
		# printf "$AGI\tMBD56\tMBD256\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD1_peaks_TEs.bed
	# then
		# printf "$AGI\tMBD56\tMBD1\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_TEs_and_TE_genes.bed >> manuscript/deeptools/MBD5_6_peaks_TEs.bed
	# fi
# done < manuscript/deeptools/MBD5_6_peaks_TE_temp.txt

# ### To get gene bed files of MBD5/6/SUVH1/3 peaks (removing ones called in common, MBD2/5/6, MBD1 and MBD5/6 peaks)
# while read name AGI
# do
	# if grep -qw $AGI manuscript/deeptools/common_peaks_TEs.bed
	# then
		# printf "$AGI\tSUVH13\tcommon\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD2_5_6_peaks_TEs.bed
	# then
		# printf "$AGI\tSUVH13\tMBD256\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD1_peaks_TEs.bed
	# then
		# printf "$AGI\tSUVH13\tMBD1\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# elif grep -qw $AGI manuscript/deeptools/MBD5_6_peaks_TEs.bed
	# then
		# printf "$AGI\tSUVH13\tMBD56\n" >> manuscript/deeptools/two_groups_common_peaks_temp.txt
	# else
		# grep -w $AGI manuscript/annotations/all_TEs_and_TE_genes.bed >> manuscript/deeptools/MBD5_6_SUVH1_3_peaks_TEs.bed
	# fi
# done < manuscript/deeptools/MBD5_6_SUVH1_3_peaks_TE_temp.txt

# rm manuscript/deeptools/*temp.txt
rm manuscript/deeptools/TEs_labels.txt
for file in manuscript/deeptools/*_peaks_TEs.bed
do
	namext=${file##*/}
	name=${namext%_TEs.bed}
	wc -l $file | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/TEs_labels.txt
done
clustername=$(cat manuscript/deeptools/TEs_labels.txt)

# computeMatrix reference-point --referencePoint "TSS" -R manuscript/deeptools/*_peaks_TEs.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/grouped_TEs_vf1.gz
# printf "plotting heatmap part1 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_TEs_vf1.gz -out manuscript/paper_plots/heatmap_TEs_vf1.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --refPointLabel "" --xAxisLabel "TE" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions manuscript/deeptools/regions_refpoint_TEs.txt

# # # #### getting mC heatmap

printf "computing methylation matrix for peaks groups\n"
computeMatrix reference-point --referencePoint "TSS" -R manuscript/deeptools/regions_refpoint_TEs.txt -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 100 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_mC.gz
printf "plotting methylation profiles for peaks groups\n"
plotHeatmap -m manuscript/deeptools/matrix_grouped_peaks_mC.gz -out manuscript/paper_plots/heatmap_TEs_mC_vf1.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --refPointLabel "" --xAxisLabel "TE" --samplesLabel mCG mCHG mCHH --regionsLabel ${clustername} --zMin 0 --zMax 100 80 30 --whatToShow "heatmap and colorbar"

# #### getting heatmap with replicates and WT

# printf "computing matrix part1 for peaks groups in $clustername\n"
# computeMatrix reference-point --referencePoint "TSS" -R manuscript/deeptools/regions_refpoint_TEs.txt -S manuscript/MBD1_rep*.bw manuscript/MBD2_rep*.bw manuscript/MBD4_rep*.bw manuscript/MBD5_rep*.bw manuscript/MBD6_rep*.bw manuscript/SUVH1_rep*.bw manuscript/SUVH3_rep*.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/grouped_TEs_vf_rep.gz
# printf "plotting heatmap part1 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_TEs_vf_rep.gz -out manuscript/paper_plots/heatmap_TEs_vf_rep.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'seismic' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --refPointLabel "" --xAxisLabel "TE" --samplesLabel MBD1_rep1 MBD1_rep2 MBD2_rep1 MBD2_rep2 MBD4_rep1 MBD4_rep2 MBD5_rep1 MBD5_rep2 MBD6_rep1 MBD6_rep2 SUVH1_rep1 SUVH1_rep2 SUVH3_rep1 SUVH3_rep2 WT RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar"


# #### getting distance to centromeres
# rm manuscript/deeptools/distance_to_centromeres*
# printf "1\t14511722\t14803970\n2\t3611839\t3633423\n3\t13589757\t13867121\n4\t3133664\t3133674\n5\t11194538\t11723210\n" > annotations/centromere_positions.bed
# for chr in 1 2 3 4 5
# do
	# cs=$(awk -v c=$chr '$1==c {print $2}' annotations/centromere_positions.bed)
	# ce=$(awk -v c=$chr '$1==c {print $3}' annotations/centromere_positions.bed)
	# max=$(awk -v c=$chr '$1==c {print $2}' methyl/chromosome_sizes.txt)
	# cat manuscript/deeptools/regions_refpoint_TEs.txt | awk -v OFS="\t" -v c=$chr -v s=$cs -v e=$ce -v m=$max '$1==c {if ($3<s) d=((s-$3)/(s-1))*100; else if ($2>e) d=(($2-e)/(m-e))*100; else d=0; print $1,$2,$3,d}'  > manuscript/deeptools/distance_to_centromeres_${chr}.bedGraph
# done
# cat manuscript/deeptools/distance_to_centromeres_*.bedGraph | sort -k1,1n -k2,2n | bedtools merge -c 4 -o min -i - > manuscript/deeptools/distance_to_centromeres.bedGraph

# bedGraphToBigWig manuscript/deeptools/distance_to_centromeres.bedGraph methyl/chromosome_sizes.txt manuscript/deeptools/distance_to_centromeres.bw
# printf "computing matrix part3 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_refpoint_TEs.txt -S manuscript/deeptools/distance_to_centromeres.bw manuscript/deeptools/distance_to_centromeres.bw -bs 10 -b 0 -a 0 -m 300 -p 10 -o manuscript/deeptools/distance_to_centro_TEs.gz
# printf "plotting heatmap part3 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/distance_to_centro_TEs.gz -out manuscript/paper_plots/heatmap_distance_to_centro_TEs.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' 'gray' --startLabel "" --endLabel "" --xAxisLabel "TE" --samplesLabel "Distance to centromere" "" --regionsLabel ${clustername} --zMin 0 0 --zMax 100 100 --whatToShow "heatmap and colorbar"






















# #### getting distance to centromeres
# rm manuscript/deeptools/distance_to_centromeres*
# printf "1\t14511722\t14803970\n2\t3611839\t3633423\n3\t13589757\t13867121\n4\t3133664\t3133674\n5\t11194538\t11723210\n" > annotations/centromere_positions.bed
# for chr in 1 2 3 4 5
# do
	# cs=$(awk -v c=$chr '$1==c {print $2}' annotations/centromere_positions.bed)
	# ce=$(awk -v c=$chr '$1==c {print $3}' annotations/centromere_positions.bed)
	# max=$(awk -v c=$chr '$1==c {print $2}' methyl/chromosome_sizes.txt)
	# cat manuscript/deeptools/regions_grouped_peaks.bed manuscript/deeptools/nal_random_peaks.bed | awk -v OFS="\t" -v c=$chr -v s=$cs -v e=$ce -v m=$max '$1==c {if ($3<s) d=((s-$3)/(s-1))*100; else if ($2>e) d=(($2-e)/(m-e))*100; else d=0; print $1,$2,$3,d}'  > manuscript/deeptools/distance_to_centromeres_${chr}.bedGraph
# done
# cat manuscript/deeptools/distance_to_centromeres_*.bedGraph | sort -k1,1n -k2,2n > manuscript/deeptools/distance_to_centromeres.bedGraph

# printf "Group\tPeak_ID\tDistance\tChr\tStart\tEnd\n" > manuscript/deeptools/distance_to_centromeres_grouped.txt
# for reg in random common MBD1 MBD4 MBD2_5_6 MBD5_6 MBD5_6_SUVH1_3
# do
	# bedtools intersect -a manuscript/deeptools/distance_to_centromeres.bedGraph -b manuscript/deeptools/*al_${reg}_peaks.bed -wa -f 1 -r | awk -v OFS="\t" -v r=$reg '{print r,r"_peaks_"NR,$4,$1,$2,$3}' >> manuscript/deeptools/distance_to_centromeres_grouped.txt
# done

##### plotting final heatmap of Distance to centromere over the 6 clusters

# bedGraphToBigWig manuscript/deeptools/distance_to_centromeres.bedGraph methyl/chromosome_sizes.txt manuscript/deeptools/distance_to_centromeres.bw
# printf "computing matrix part3 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_grouped_peaks.bed -S manuscript/deeptools/distance_to_centromeres.bw -bs 10 -b 0 -a 0 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf3.gz
# printf "plotting heatmap part3 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf3.gz -out manuscript/plots/heatmap_grouped_vf3.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel "Distance to centromere" --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"


# ######## To plot different profiles over the 6 clusters

# clustername=$(cat manuscript/deeptools/regions_labels.txt)

# printf "computing regions matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_chip.gz
# printf "plotting regions profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_chip.gz -out manuscript/paper_plots/profile_grouped_peaks_chip.pdf --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --plotType 'lines' --averageType 'mean' --startLabel "Peak" --endLabel "" --yAxisLabel "ChIP" --perGroup --regionsLabel $clustername

# for context in CG CHG CHH
# do
	# case $context in
		# CG) ymax=100
			# ymin=-1
			# ylab="mCG(%)";;
		# CHG) ymax=50
			# ymin=-0.5
			# ylab="mCHG(%)";;
		# CHH) ymax=20
			# ymin=-0.2
			# ylab="mCHH(%)";;
	# esac
	# printf "computing $context methylation matrix for peaks groups\n"
	# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_${context}.gz
	# printf "plotting $context methylation profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${context}.gz -out manuscript/paper_plots/profile_grouped_peaks_${context}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --perGroup --yMin 0 --yMax $ymax
# done
	
# printf "computing rna matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_rna.gz
# printf "plotting rna profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_rna.gz -out manuscript/paper_plots/profile_grouped_peaks_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA (median)" --perGroup
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# printf "computing $mark matrix for peaks groups\n"
	# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_${mark}.gz
	# printf "plotting $mark profile for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${mark}.gz -out manuscript/paper_plots/profile_grouped_peaks_${mark}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup --outFileNameData manuscript/plots/profile_grouped_peaks_${mark}_data.txt
	# printf "plotting $mark heatmap for peaks groups\n"
	# plotHeatmap -m manuscript/deeptools/matrix_grouped_peaks_${mark}.gz -out manuscript/paper_plots/heatmap_grouped_peaks_${mark}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"
# done

# printf "computing CG density matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S annotations/CG_density.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz --missingDataAsZero
# printf "plotting CG density profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz -out manuscript/paper_plots/profile_grouped_peaks_cgdensity.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel "CG density" --yAxisLabel "CG density" --perGroup




























###############################################################################################
################### Code for clustering all peaks
###############################################################################################

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	order=1
				# clusters=2;;
		# MBD2) 	order=3
				# clusters=4;;
		# MBD4) 	order=2
				# clusters=2;;
		# MBD5) 	order=4
				# clusters=3;;
		# MBD6) 	order=5
				# clusters=3;;
		# SUVH1) 	order=6
				# clusters=2;;
		# SUVH3) 	order=7
				# clusters=2;;
	# esac
	# printf "computing matrix for $sample peaks\n"
	#### pass1 (test different cluster sizes
	# computeMatrix scale-regions -R manuscript/peaks_${sample}.bed -S manuscript/MBD1.bw manuscript/MBD4.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/peaks_${sample}.gz
	# for clusters in 1 2 3 4 5
	# do
		# printf "plotting heatmap with $clusters clusters for $sample\n"
		# plotHeatmap -m manuscript/deeptools/peaks_${sample}.gz -out manuscript/plots/heatmap_peaks_${sample}_k${clusters}.pdf --sortRegions descend --sortUsingSamples $order --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --kmeans $clusters --startLabel "Peak start" --endLabel "Peak end" --samplesLabel MBD1 MBD4 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --outFileSortedRegions manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt
	# done
# done

#### making genes and TEs heatmaps and profiles

# bedtools merge -i annotations/all_genes.bed | awk -v OFS="\t" '{print $1,$2,$3,"1"}' > manuscript/temp_genes.bed
# bedtools complement -i manuscript/temp_genes.bed -g manuscript/tair10_nuc.txt | awk '{print $1,$2,$3,"0"}' > manuscript/temp2_genes.bed
# cat manuscript/temp_genes.bed manuscript/temp2_genes.bed | sort -k1,1n -k2,2n > manuscript/all_genes.bedGraph

# bedtools merge -i annotations/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,"1"}' > manuscript/temp_TEs.bed
# bedtools complement -i manuscript/temp_TEs.bed -g manuscript/tair10_nuc.txt | awk '{print $1,$2,$3,"0"}' > manuscript/temp2_TEs.bed
# cat manuscript/temp_TEs.bed manuscript/temp2_TEs.bed | sort -k1,1n -k2,2n > manuscript/all_TEs.bedGraph

# bedGraphToBigWig manuscript/all_genes.bedGraph manuscript/tair10_nuc.txt manuscript/all_genes.bw
# bedGraphToBigWig manuscript/all_TEs.bedGraph manuscript/tair10_nuc.txt manuscript/all_TEs.bw

# rm manuscript/temp*

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	clusters=2;;
		# MBD2) 	clusters=4;;
		# MBD4) 	clusters=2;;
		# MBD5) 	clusters=3;;
		# MBD6) 	clusters=3;;
		# SUVH1) 	clusters=2;;
		# SUVH3) 	clusters=2;;
	# esac
	# rm manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# for (( i=1; i<=$clusters; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v i=$clust '$13==i' manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt | wc -l | awk -v i=$i '{print "cluster_"i"("$1")"}' >> manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# done
	# clustername=$(cat manuscript/deeptools/${sample}_clusters${clusters}_size.txt)
	# printf "computing genes and TEs matrix for $sample peaks\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/genes_TEs_${sample}.gz
	# printf "plotting genes and TEs heatmap for $sample peaks\n"
	# plotHeatmap -m manuscript/deeptools/genes_TEs_${sample}.gz -out manuscript/plots/heatmap_genes_TEs_${sample}_k${clusters}.pdf --sortRegions keep --missingDataColor 0 --colorMap 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel Genes TEs --regionsLabel ${clustername} --zMin 0 0 --zMax 1 1 --whatToShow "heatmap"
# done	



# rm manuscript/deeptools/temp*
# ### MBD1 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_k2.txt >> manuscript/deeptools/temp_common_peaks.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_cluster2.txt >> manuscript/deeptools/temp_MBD1_peaks.bed
# ### MBD2 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_common_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_4" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# ### MBD4 peaks
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD4_k2.txt >> manuscript/deeptools/temp_common_peaks.bed
# ### MBD5 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# ### MBD6 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_cluster3.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_cluster3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# ### SUVH1 peaks
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_SUVH1_k2.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# ### SUVH3 peaks
# awk -v OFS="\t" '$$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_SUVH3_k2.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# ### MBD1_2 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD1_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD1_peaks.bed

# for file in manuscript/deeptools/temp_*.bed
# do
	# namext=${file##*/temp_}
	# name=${namext%_peaks.bed}
	# sort -k1,1n -k2,2n $file > manuscript/deeptools/temp2_${namext}
	# bedtools merge -d 100 -i manuscript/deeptools/temp2_${namext} > manuscript/deeptools/temp3_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp3_common_peaks.bed > manuscript/deeptools/final_common_peaks.bed
# rm manuscript/deeptools/temp3_common_peaks.bed

# for file in manuscript/deeptools/temp3_*.bed
# do
	# namext=${file##*/temp3_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_common_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp4_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp4_MBD5_6_SUVH1_3_peaks.bed > manuscript/deeptools/final_MBD5_6_SUVH1_3_peaks.bed
# rm manuscript/deeptools/temp4_MBD5_6_SUVH1_3_peaks.bed

# for file in manuscript/deeptools/temp4_*.bed
# do
	# namext=${file##*/temp4_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_MBD5_6_SUVH1_3_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp5_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp5_MBD2_5_6_peaks.bed > manuscript/deeptools/final_MBD2_5_6_peaks.bed
# rm manuscript/deeptools/temp5_MBD2_5_6_peaks.bed

# for file in manuscript/deeptools/temp5_*.bed
# do
	# namext=${file##*/temp5_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_MBD2_5_6_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp6_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp6_MBD5_6_peaks.bed > manuscript/deeptools/final_MBD5_6_peaks.bed
# rm manuscript/deeptools/temp6_MBD5_6_peaks.bed

# for file in manuscript/deeptools/temp6_*.bed
# do
	# namext=${file##*/temp6_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_MBD5_6_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp7_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp7_MBD1_peaks.bed > manuscript/deeptools/final_MBD1_peaks.bed
# rm manuscript/deeptools/temp*

# #####################################################################################################################################################################################
# # # ### To check that final regions are not overlapping

# rm manuscript/overlapping_cluster_regions.txt
# for reg1 in common MBD1 MBD2_5_6 MBD5_6 MBD5_6_SUVH1_3
# do
	# for reg2 in common MBD1 MBD2_5_6 MBD5_6 MBD5_6_SUVH1_3
	# do
		# bedtools intersect -a manuscript/deeptools/final_${reg1}_peaks.bed -b manuscript/deeptools/final_${reg2}_peaks.bed | wc -l | awk -v OFS="\t" -v r1=$reg1 -v r2=$reg2 '{print r1,r2,$1}' >> manuscript/overlapping_cluster_regions.txt
	# done
# done
# cat manuscript/overlapping_cluster_regions.txt

# #####################################################################################################################################################################################

### To split common cluster based on MBD4

# ### pass1
# printf "computing matrix for common peaks pass1\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_common_peaks.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD5.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks.gz
# printf "plotting heatmap for common peaks pass1\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks.gz -out manuscript/plots/heatmap_common_pass1_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --zMin -1.5 --zMax 1.5 --samplesLabel MBD4 MBD1 MBD5 --outFileSortedRegions manuscript/deeptools/regions_common_pass1_k2.bed --kmeans 2

# ### pass2
# awk -v OFS="\t" '$13=="cluster_2"' manuscript/deeptools/regions_common_pass1_k2.bed > manuscript/deeptools/regions_common_pass2.bed
# printf "computing matrix for common peaks pass2\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_common_pass2.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD5.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks_pass2.gz
# printf "plotting heatmap for common peaks pass2\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks_pass2.gz -out manuscript/plots/heatmap_common_pass2_k4.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --zMin -1.5 --zMax 1.5 --samplesLabel MBD4 MBD1 MBD5 --outFileSortedRegions manuscript/deeptools/regions_common_pass2_k4.bed --kmeans 4

# ### pass3
# awk -v OFS="\t" '$13=="cluster_4"' manuscript/deeptools/regions_common_pass2_k4.bed > manuscript/deeptools/regions_common_pass3.bed
# printf "computing matrix for common peaks pass3\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_common_pass3.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD5.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks_pass3.gz
# printf "plotting heatmap for common peaks pass3\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks_pass3.gz -out manuscript/plots/heatmap_common_pass3_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --zMin -1.5 --zMax 1.5 --samplesLabel MBD4 MBD1 MBD5 --outFileSortedRegions manuscript/deeptools/regions_common_pass3_k2.bed --kmeans 2

# ### pass4
# awk -v OFS="\t" '$13=="cluster_2"' manuscript/deeptools/regions_common_pass3_k2.bed | sort -k1,1n -k2,2n > manuscript/deeptools/regions_common_pass4.bed
# printf "computing matrix for common peaks pass4\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_common_pass4.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks_pass4.gz
# printf "plotting heatmap for common peaks pass4\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks_pass4.gz -out manuscript/plots/heatmap_common_pass4_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 --zMin -1.5 --zMax 1.5 --kmeans 2 --outFileSortedRegions manuscript/deeptools/regions_common_pass4_k2.bed

# ### final check
# awk -v OFS="\t" '$13=="cluster_2"' manuscript/deeptools/regions_common_pass4_k2.bed | sort -k1,1n -k2,2n > manuscript/deeptools/final2_MBD4_peaks.bed
# bedtools intersect -v -f 1 -r -a manuscript/deeptools/final_common_peaks.bed -b manuscript/deeptools/final2_MBD4_peaks.bed > manuscript/deeptools/final2_common_peaks.bed

# printf "computing matrix for peaks groups in common and MBD4\n"
# computeMatrix scale-regions -R manuscript/deeptools/final2_common_peaks.bed manuscript/deeptools/final2_MBD4_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_MBD4_peaks.gz
# printf "plotting heatmap for peaks common and MBD4\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_MBD4_peaks.gz -out manuscript/plots/heatmap_common_MBD4.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 RNA Genes TEs --regionsLabel "Common" "MBD4" --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar"

# mv manuscript/deeptools/final2_common_peaks.bed manuscript/deeptools/final_common_peaks.bed
# mv manuscript/deeptools/final2_MBD4_peaks.bed manuscript/deeptools/final_MBD4_peaks.bed




###############################################################################################

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	order=1
				# clusters=2;;
		# MBD2) 	order=3
				# clusters=4;;
		# MBD4) 	order=2
				# clusters=2;;
		# MBD5) 	order=4
				# clusters=3;;
		# MBD6) 	order=5
				# clusters=3;;
		# SUVH1) 	order=6
				# clusters=2;;
		# SUVH3) 	order=7
				# clusters=2;;
	# esac
	# printf "computing matrix for $sample peaks\n"
	#### pass1 (test different cluster sizes
	# computeMatrix scale-regions -R manuscript/peaks_${sample}.bed -S manuscript/MBD1.bw manuscript/MBD4.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/peaks_${sample}.gz
	# for clusters in 1 2 3 4 5
	# do
		# printf "plotting heatmap with $clusters clusters for $sample\n"
		# plotHeatmap -m manuscript/deeptools/peaks_${sample}.gz -out manuscript/plots/heatmap_peaks_${sample}_k${clusters}.pdf --sortRegions descend --sortUsingSamples $order --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --kmeans $clusters --startLabel "Peak start" --endLabel "Peak end" --samplesLabel MBD1 MBD4 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --outFileSortedRegions manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt
	# done
# done

#### making final ChIP heatmaps and methylation heatmaps and profile

# for metsample in WT_seedlings MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 SUVH1_SUVH3 MBD1_MBD2 MBD5_MBD6 MBD1_MBD4 mbd1_2_4 mbd2_5_6 mbd1_2_5_6
# do
	# echo "$metsample mC"
	# zcat /home/jcahn/working_data/Runs/Map/At_MBDs/BS_seeker2/Hub/mm2_end_${metsample}.CGmap.gz | awk -v OFS="\t" '$1 ~ /[1-9]$/ {print $1,$3-1,$3,$6*100}' > methyl/mC_${metsample}.bedGraph
	# echo "making bigWig"
	# bedGraphToBigWig methyl/mC_${metsample}.bedGraph methyl/chromosome_sizes.txt methyl/mC_${metsample}.bw
# done
# rm methyl/*.bedGraph
# rm /home/jcahn/working_data/Runs/Map/At_MBDs/BS_seeker2/Hub/*

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	mutants=(WT MBD1 MBD1_MBD2 MBD1_MBD4 mbd1_2_4 mbd1_2_5_6)
				# mutlabels=(WT mbd1 mbd1/2 mbd1/4 mbd1/2/4 mbd1/2/5/6)
				# clusters=2;;
		# MBD2) 	mutants=(WT MBD2 MBD1_MBD2 mbd1_2_4 mbd2_5_6 mbd1_2_5_6)
				# mutlabels=(WT mbd2 mbd1/2 mbd1/2/4 mbd2/5/6 mbd1/2/5/6)
				# clusters=4;;
		# MBD4) 	mutants=(WT MBD4 MBD1_MBD4 mbd1_2_4)
				# mutlabels=(WT mbd4 mbd1/4 mbd1/2/4)
				# clusters=2;;
		# MBD5) 	mutants=(WT MBD5 MBD5_MBD6 mbd2_5_6 mbd1_2_5_6)
				# mutlabels=(WT mbd5 mbd5/6 mbd2/5/6 mbd1/2/5/6)
				# clusters=3;;
		# MBD6) 	mutants=(WT MBD6 MBD5_MBD6 mbd2_5_6 mbd1_2_5_6)
				# mutlabels=(WT mbd6 mbd5/6 mbd2/5/6 mbd1/2/5/6)
				# clusters=3;;
		# SUVH1) 	mutants=(WT SUVH1 SUVH1_SUVH3)
				# mutlabels=(WT suvh1 suvh1/3)
				# clusters=2;;
		# SUVH3) 	mutants=(WT SUVH3 SUVH1_SUVH3)
				# mutlabels=(WT suvh3 suvh1/3)
				# clusters=2;;
	# esac
	# len=${#mutants[@]}
	# rm manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# for (( i=1; i<=$clusters; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v i=$clust '$13==i' manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt | wc -l | awk -v i=$i '{print "cluster_"i"("$1")"}' >> manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# done
	# clustername=$(cat manuscript/deeptools/${sample}_clusters${clusters}_size.txt)
	# ### pass2 (rebuild matrix and heatmap on chosen cluster number)
	# printf "computing final matrix for $sample peaks\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S manuscript/MBD1.bw manuscript/MBD4.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/peaks_vf_${sample}.gz
	# printf "plotting final heatmap for $sample peaks\n"
	# plotHeatmap -m manuscript/deeptools/peaks_vf_${sample}.gz -out manuscript/plots/heatmap_peaks_vf_${sample}_k${clusters}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD4 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"
	# for context in CG CHG CHH
	# do
		# rm manuscript/deeptools/${sample}_${context}_mutantlist.txt
		# for (( i=0; i<$len; i++ )) 
		# do 
			# if [[ ${mutants[$i]} == "WT" ]]
			# then
				# name="WT_seedlings"
			# else
				# name=${mutants[$i]}
			# fi
			# printf "methyl/${context}_${name}.bw " >> manuscript/deeptools/${sample}_${context}_mutantlist.txt
		# done
		# mutantlist=$(cat manuscript/deeptools/${sample}_${context}_mutantlist.txt)
		# printf "computing $context methylation matrix for $sample peaks in $clusters clusters\nmutantlist: ${mutants[@]}\nfiles: $mutantlist\n"
		# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S ${mutantlist} -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/methylation_${sample}_${context}_k${clusters}.gz --sortRegions keep
		# printf "plotting $context methylation heatmap for $sample\n"
		# plotHeatmap -m manuscript/deeptools/methylation_${sample}_${context}_k${clusters}.gz -out manuscript/plots/heatmap_methylation_${sample}_${context}_k${clusters}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel ${mutlabels[@]} --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"
		# printf "plotting $context methylation profile for $sample\n" 
		# plotProfile -m manuscript/deeptools/methylation_${sample}_${context}_k${clusters}.gz -out manuscript/plots/profile_methylation_${sample}_${context}_k${clusters}.pdf --plotType 'lines' --averageType 'mean' --perGroup --startLabel "Peak" --endLabel "" --samplesLabel ${mutlabels[@]} --regionsLabel ${clustername} --yAxisLabel "m${context} (%)"
	# done
# done

#### making genes and TEs heatmaps and profiles

# bedtools merge -i annotations/all_genes.bed | awk -v OFS="\t" '{print $1,$2,$3,"1"}' > manuscript/temp_genes.bed
# bedtools complement -i manuscript/temp_genes.bed -g manuscript/tair10_nuc.txt | awk '{print $1,$2,$3,"0"}' > manuscript/temp2_genes.bed
# cat manuscript/temp_genes.bed manuscript/temp2_genes.bed | sort -k1,1n -k2,2n > manuscript/all_genes.bedGraph

# bedtools merge -i annotations/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,"1"}' > manuscript/temp_TEs.bed
# bedtools complement -i manuscript/temp_TEs.bed -g manuscript/tair10_nuc.txt | awk '{print $1,$2,$3,"0"}' > manuscript/temp2_TEs.bed
# cat manuscript/temp_TEs.bed manuscript/temp2_TEs.bed | sort -k1,1n -k2,2n > manuscript/all_TEs.bedGraph

# bedGraphToBigWig manuscript/all_genes.bedGraph manuscript/tair10_nuc.txt manuscript/all_genes.bw
# bedGraphToBigWig manuscript/all_TEs.bedGraph manuscript/tair10_nuc.txt manuscript/all_TEs.bw

# rm manuscript/temp*

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	clusters=2;;
		# MBD2) 	clusters=4;;
		# MBD4) 	clusters=2;;
		# MBD5) 	clusters=3;;
		# MBD6) 	clusters=3;;
		# SUVH1) 	clusters=2;;
		# SUVH3) 	clusters=2;;
	# esac
	# rm manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# for (( i=1; i<=$clusters; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v i=$clust '$13==i' manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt | wc -l | awk -v i=$i '{print "cluster_"i"("$1")"}' >> manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# done
	# clustername=$(cat manuscript/deeptools/${sample}_clusters${clusters}_size.txt)
	# printf "computing genes and TEs matrix for $sample peaks\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/genes_TEs_${sample}.gz
	# printf "plotting genes and TEs heatmap for $sample peaks\n"
	# plotHeatmap -m manuscript/deeptools/genes_TEs_${sample}.gz -out manuscript/plots/heatmap_genes_TEs_${sample}_k${clusters}.pdf --sortRegions keep --missingDataColor 0 --colorMap 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel Genes TEs --regionsLabel ${clustername} --zMin 0 0 --zMax 1 1 --whatToShow "heatmap"
# done	


# #### making complete heatmaps

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	clusters=2;;
		# MBD2) 	clusters=4;;
		# MBD4) 	clusters=2;;
		# MBD5) 	clusters=3;;
		# MBD6) 	clusters=3;;
		# SUVH1) 	clusters=2;;
		# SUVH3) 	clusters=2;;
	# esac
	# rm manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# for (( i=1; i<=$clusters; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v i=$clust '$13==i' manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt | wc -l | awk -v i=$i '{print "cluster_"i"("$1")"}' >> manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# done
	# clustername=$(cat manuscript/deeptools/${sample}_clusters${clusters}_size.txt)
	# ### with each mC context individually
	# # printf "computing complete matrix for $sample peaks\n"
	# # computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S manuscript/MBD1.bw manuscript/MBD4.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/complete_${sample}.gz
	# # printf "plotting complete heatmap for $sample peaks\n"
	# # plotHeatmap -m manuscript/deeptools/complete_${sample}.gz -out manuscript/plots/heatmap_complete_${sample}_k${clusters}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'Oranges' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bicubic --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD4 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mCG mCHG mCHH RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 80 40 1.5 1 1 --whatToShow "heatmap and colorbar"
	# ### with mC and mCH contexts
	# printf "computing complete matrix for $sample peaks\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S manuscript/MBD1.bw manuscript/MBD4.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw methyl/mCH_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/complete_${sample}.gz
	# printf "plotting complete heatmap for $sample peaks\n"
	# plotHeatmap -m manuscript/deeptools/complete_${sample}.gz -out manuscript/plots/heatmap_complete_${sample}_k${clusters}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD4 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mC mCH RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 80 1 1 1 --whatToShow "heatmap and colorbar"
# done
	
#### making rna heatmaps and profiles

# for sample in mbd1_2_4 mbd1_2_5_6 mbd2_5_6 suvh1_3
# do
	# printf "creating bw DEG files for $sample\n"
	# rm manuscript/temp_DEG_${sample}.bed
	# while read AGI
	# do
		# grep $AGI rna/allDEG.bed | awk -v OFS="\t" '{print $1,$2,$3,"-1"}' >> manuscript/temp_DEG_${sample}.bed
	# done < rna/list_DOWN_DEG_${sample}.txt
	# while read AGI
	# do
		# grep $AGI rna/allDEG.bed | awk -v OFS="\t" '{print $1,$2,$3,"1"}' >> manuscript/temp_DEG_${sample}.bed
	# done < rna/list_UP_DEG_${sample}.txt
	# sort -k1,1n -k2,2n manuscript/temp_DEG_${sample}.bed > manuscript/temp2_DEG_${sample}.bed
	# bedtools merge -i manuscript/temp2_DEG_suvh1_3.bed -c 4 -o distinct | awk -v OFS="\t" -F"[,]" '{print $1,$2,$3,$4}' | awk -v OFS="\t" '{print $1,$2,$3,$4}' > manuscript/temp3_DEG_${sample}.bed
	# bedtools complement -i manuscript/temp3_DEG_${sample}.bed -g manuscript/tair10_nuc.txt | awk '{print $1,$2,$3,"0"}' > manuscript/temp4_DEG_${sample}.bed
	# cat manuscript/temp3_DEG_${sample}.bed manuscript/temp4_DEG_${sample}.bed | sort -k1,1n -k2,2n > manuscript/DEG_${sample}.bedGraph
	# bedGraphToBigWig manuscript/DEG_${sample}.bedGraph manuscript/tair10_nuc.txt manuscript/DEG_${sample}.bw
# done

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "$sample" in
		# MBD1) 	mutants=(WT mbd1_2_4 mbd1_2_5_6)
				# mutlabels=(WT mbd1/2/4 mbd1/2/5/6 DEG_mbd1/2/4 DEG_mbd1/2/5/6)
				# mutlabelsprofile=(WT mbd1/2/4 mbd1/2/5/6)
				# min=(0 0 0 -1 -1)
				# max=(1.5 1.5 1.5 1 1)
				# colors=("viridis" "viridis" "viridis" "coolwarm" "coolwarm")
				# clusters=2;;
		# MBD2) 	mutants=(WT mbd1_2_4 mbd2_5_6 mbd1_2_5_6)
				# mutlabels=(WT mbd1/2/4 mbd2/5/6 mbd1/2/5/6 DEG_mbd1/2/4 DEG_mbd2/5/6 DEG_mbd1/2/5/6)
				# mutlabelsprofile=(WT mbd1/2/4 mbd2/5/6 mbd1/2/5/6)
				# min=(0 0 0 0 -1 -1 -1)
				# max=(1.5 1.5 1.5 1.5 1 1 1)
				# colors=("viridis" "viridis" "viridis" "viridis" "coolwarm" "coolwarm" "coolwarm")
				# clusters=4;;
		# MBD4) 	mutants=(WT mbd1_2_4)
				# mutlabels=(WT mbd1/2/4 DEG_mbd1/2/4)
				# mutlabelsprofile=(WT mbd1/2/4)
				# min=(0 0 -1)
				# max=(1.5 1.5 1)
				# colors=("viridis" "viridis" "coolwarm")
				# clusters=2;;
		# MBD5) 	mutants=(WT mbd2_5_6 mbd1_2_5_6)
				# mutlabels=(WT mbd2/5/6 mbd1/2/5/6 DEG_mbd2/5/6 DEG_mbd1/2/5/6)
				# mutlabelsprofile=(WT mbd2/5/6 mbd1/2/5/6)
				# min=(0 0 0 -1 -1)
				# max=(1.5 1.5 1.5 1 1)
				# colors=("viridis" "viridis" "viridis" "coolwarm" "coolwarm")
				# clusters=3;;
		# MBD6) 	mutants=(WT mbd2_5_6 mbd1_2_5_6)
				# mutlabels=(WT mbd2/5/6 mbd1/2/5/6 DEG_mbd2/5/6 DEG_mbd1/2/5/6)
				# mutlabelsprofiles=(WT mbd2/5/6 mbd1/2/5/6)
				# min=(0 0 0 -1 -1)
				# max=(1.5 1.5 1.5 1 1)
				# colors=("viridis" "viridis" "viridis" "coolwarm" "coolwarm")
				# clusters=3;;
		# SUVH1) 	mutants=(WT suvh1_3)
				# mutlabels=(WT suvh1/3 DEG_suvh1/3)
				# mutlabelsprofile=(WT suvh1/3)
				# min=(0 0 -1)
				# max=(1.5 1.5 1)
				# colors=("viridis" "viridis" "coolwarm")
				# clusters=2;;
		# SUVH3) 	mutants=(WT suvh1_3)
				# mutlabels=(WT suvh1/3 DEG_suvh1/3)
				# mutlabelsprofile=(WT suvh1/3)
				# min=(0 0 -1)
				# max=(1.5 1.5 1)
				# colors=("viridis" "viridis" "coolwarm")
				# clusters=2;;
	# esac
	# len=${#mutants[@]}
	# rm manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# for (( i=1; i<=$clusters; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v i=$clust '$13==i' manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt | wc -l | awk -v i=$i '{print "cluster_"i"("$1")"}' >> manuscript/deeptools/${sample}_clusters${clusters}_size.txt
	# done
	# clustername=$(cat manuscript/deeptools/${sample}_clusters${clusters}_size.txt)
	# rm manuscript/deeptools/${sample}_mutantlist.txt
	# rm manuscript/deeptools/${sample}_DEGlist.txt
	# for (( i=0; i<$len; i++ )) 
	# do 
		# printf "rna/${mutants[$i]}_a.bw " >> manuscript/deeptools/${sample}_mutantlist.txt
		# if [[ ${mutants[$i]} != "WT" ]]
		# then
			# printf "manuscript/DEG_${mutants[$i]}.bw " >> manuscript/deeptools/${sample}_DEGlist.txt
		# fi
	# done
	# mutantlist=$(cat manuscript/deeptools/${sample}_mutantlist.txt manuscript/deeptools/${sample}_DEGlist.txt)
	# mutantlistprofile=$(cat manuscript/deeptools/${sample}_mutantlist.txt)
	# printf "computing rna matrix for $sample peaks in $clusters clusters\nmutantlist: ${mutants[@]}\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S ${mutantlist} -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/rna_${sample}_k${clusters}.gz --sortRegions keep
	# printf "plotting rna heatmap for $sample\n"
	# plotHeatmap -m manuscript/deeptools/rna_${sample}_k${clusters}.gz -out manuscript/plots/heatmap_rna_${sample}_k${clusters}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap ${colors[@]} --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel ${mutlabels[@]} --regionsLabel ${clustername} --whatToShow "heatmap and colorbar" --zMin ${min[@]} --zMax ${max[@]}
	# printf "computing rna matrix for profile $sample peaks in $clusters clusters\nmutantlist: ${mutants[@]}\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_peaks_${sample}_k${clusters}.txt -S ${mutantlistprofile} -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/rna_profile_${sample}_k${clusters}.gz --sortRegions keep
	# # printf "plotting rna profile for $sample\n" 
	# # plotProfile -m manuscript/deeptools/rna_profile_${sample}_k${clusters}.gz -out manuscript/plots/profile_rna_${sample}_k${clusters}.pdf --plotType 'lines' --averageType 'median' --startLabel "Peak" --endLabel "" --samplesLabel ${mutlabelsprofile[@]} --regionsLabel ${clustername} --yAxisLabel "normalized RNA expression"
	# printf "plotting rna profile per group for $sample\n" 
	# plotProfile -m manuscript/deeptools/rna_profile_${sample}_k${clusters}.gz -out manuscript/plots/profile_rna_${sample}_k${clusters}.pdf --plotType 'lines' --perGroup --averageType 'mean' --startLabel "Peak" --endLabel "" --samplesLabel ${mutlabelsprofile[@]} --regionsLabel ${clustername} --yAxisLabel "normalized RNA expression"
# done


# #### To merge all the peaks and plot matrix and heatmaps on the clusters

# #### merging peaks in similar profile clusters from individual peak kmeans clustering

# ## intermediate matrix to extract MBD1 and MBD1+MBD2 peaks from MBD1 cluster2
# printf "compute matrix MBD1 cluster2\n"
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_k2.txt > manuscript/deeptools/temp_MBD1_MBD2_peaks.bed
# computeMatrix scale-regions -R manuscript/deeptools/temp_MBD1_MBD2_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/temp_MBD1_cluster2.gz
# printf "plot heatmap MBD1 cluster2\n"
# plotHeatmap -m manuscript/deeptools/temp_MBD1_cluster2.gz -out manuscript/plots/heatmap_MBD1_cluster2.pdf --sortRegions descend --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 Genes TEs --kmeans 2 --outFileSortedRegions manuscript/deeptools/regions_peaks_MBD1_cluster2.txt

# ## intermediate matrix to extract MBD5+MBD6 and MBD2+MBD5+MBD6 peaks from MBD6 cluster3
# printf "compute matrix MBD6 cluster3\n"
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt > manuscript/deeptools/temp_MBD2_MBD5_MBD6_peaks.bed
# computeMatrix scale-regions -R manuscript/deeptools/temp_MBD2_MBD5_MBD6_peaks.bed -S manuscript/MBD2.bw manuscript/MBD6.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/temp_MBD6_cluster3.gz
# printf "plot heatmap MBD6 cluster3\n"
# plotHeatmap -m manuscript/deeptools/temp_MBD6_cluster3.gz -out manuscript/plots/heatmap_MBD6_cluster3.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD2 MBD6 Genes TEs --kmeans 2 --outFileSortedRegions manuscript/deeptools/regions_peaks_MBD6_cluster3.txt

## # ## intermediate matrix to resplit MBD1 and MBD1+MBD2 peaks from xMBD1_2_peaks cluster

# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_cluster2.txt >> manuscript/deeptools/xtemp_MBD1_2_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/xtemp_MBD1_2_peaks.bed

# printf "compute matrix MBD1_2 cluster\n"
# computeMatrix scale-regions -R manuscript/deeptools/xtemp_MBD1_2_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/temp_MBD1_2_cluster.gz
# printf "plot heatmap xMBD1_2 cluster\n"
# plotHeatmap -m manuscript/deeptools/temp_MBD1_2_cluster.gz -out manuscript/plots/heatmap_MBD1_2_cluster.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 Genes TEs --kmeans 3 --outFileSortedRegions manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt


###################################################### 

# rm manuscript/deeptools/temp*.bed
# rm manuscript/deeptools/xtemp*.bed
# ### MBD1 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_k2.txt >> manuscript/deeptools/temp_common_peaks.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_cluster2.txt >> manuscript/deeptools/temp_MBD1_peaks.bed
# ### MBD2 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_common_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_4" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# ### MBD4 peaks
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD4_k2.txt >> manuscript/deeptools/temp_common_peaks.bed
# ### MBD5 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# ### MBD6 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_cluster3.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_cluster3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# ### SUVH1 peaks
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_SUVH1_k2.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# ### SUVH3 peaks
# awk -v OFS="\t" '$$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_SUVH3_k2.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# ### MBD1_2 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD1_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD1_peaks.bed

# rm manuscript/deeptools/regions_labels.txt
# for file in manuscript/deeptools/temp_*.bed
# do
	# namext=${file##*/temp_}
	# name=${namext%_peaks.bed}
	# sort -k1,1n -k2,2n $file > manuscript/deeptools/temp2_${namext}
	# bedtools merge -i manuscript/deeptools/temp2_${namext} > manuscript/deeptools/grouped_vf_${namext}
	# wc -l manuscript/deeptools/grouped_vf_${namext} | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/regions_labels.txt
# done
# clustername=$(cat manuscript/deeptools/regions_labels.txt)
		
# printf "computing complete matrix for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/grouped_vf_*.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf.gz
# printf "plotting complete heatmap for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf.gz -out manuscript/plots/heatmap_grouped_vf.pdf --sortRegions descend --sortUsing mean --sortUsingSamples 11 12 --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar"

##### To intersect common peaks with annotations

# bedtools closest -a manuscript/deeptools/grouped_v2_common_peaks.bed -b annotations/all_annotations.bed -D b | awk -v OFS="\t" '$10<1000 && $10>-1000 {print $4,$5,$6,$7,$10,$9}' > manuscript/deeptools/closest_annot_to_common_peaks_info.bed
# bedtools closest -a manuscript/deeptools/grouped_v2_common_peaks.bed -b annotations/all_annotations.bed -D b -t first| awk -v OFS="\t" '$10<1000 && $10>-1000 {print $4,$5,$6}' | sort -k1,1n -k2,2n | uniq > manuscript/deeptools/closest_annot_to_common_peaks.bed
# head manuscript/deeptools/closest_annot_to_common_peaks.bed
# bedtools intersect -v -f 1 -r -wa -a annotations/all_annotations.bed -b manuscript/deeptools/closest_annot_to_common_peaks.bed > manuscript/deeptools/other_annot_not_common.bed
# printf "computing refpoint matrix\n"
# computeMatrix reference-point --referencePoint "TSS" -R manuscript/deeptools/closest_annot_to_common_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/matrix_common_annotations.gz
# printf "plotting refpoint heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_common_annotations.gz -out manuscript/plots/heatmap_common_annotations.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar"

#################### to plot profiles of all marks in the common and other annotations; change regions between manuscript/deeptools/closest_annot_to_common_peaks.bed manuscript/deeptools/other_annot_not_common.bed to look for differences between the closest annotations to common peaks and the others, or with manuscript/deeptools/grouped_v2_*.bed to look at the profiles around the different groups of peaks. Need to change -a -b -m and labels

# printf "computing regions matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/grouped_vf_*.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_chip.gz
# printf "plotting regions profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_chip.gz -out manuscript/plots/profile_grouped_peaks_chip.pdf --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --plotType 'lines' --averageType 'mean' --startLabel "Peak" --endLabel "" --yAxisLabel "ChIP" --perGroup --regionsLabel $clustername
# printf "plotting regions profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_chip.gz -out manuscript/plots/profile_grouped_peaks_chip_median.pdf --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --plotType 'lines' --averageType 'median' --startLabel "Peak" --endLabel "" --yAxisLabel "ChIP" --perGroup --regionsLabel $clustername

# for context in CG CHG CHH
# do
	# case $context in
		# CG) ymax=100
			# ymin=-1
			# ylab="mCG(%)";;
		# CHG) ymax=50
			# ymin=-0.5
			# ylab="mCHG(%)";;
		# CHH) ymax=20
			# ymin=-0.2
			# ylab="mCHH(%)";;
	# esac
	# # printf "computing $context methylation matrix for peaks groups\n"
	# # computeMatrix scale-regions -R manuscript/deeptools/grouped_v2_*.bed -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_${context}.gz
	# # printf "plotting $context methylation profiles for peaks groups\n"
	# # plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${context}.gz -out manuscript/plots/profile_grouped_peaks_${context}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --perGroup --yMin 0 --yMax $ymax
	# printf "plotting $context methylation profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${context}.gz -out manuscript/plots/profile_grouped_peaks_${context}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --perGroup --yMin $ymin --yMax $ymax
# done
	
# printf "computing rna matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/grouped_v2_*.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_rna.gz
# printf "plotting rna profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_rna.gz -out manuscript/plots/profile_grouped_peaks_rna.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "sum RNA replicates" --perGroup
# printf "plotting rna profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_rna.gz -out manuscript/plots/profile_grouped_peaks_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "sum RNA replicates" --perGroup
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# # printf "computing $mark matrix for peaks groups\n"
	# # computeMatrix scale-regions -R manuscript/deeptools/grouped_v2_*.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_${mark}.gz
	# printf "plotting $mark profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${mark}.gz -out manuscript/plots/profile_grouped_peaks_${mark}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup --yMin -0.5 --yMax 0.5
	# printf "plotting $mark profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${mark}.gz -out manuscript/plots/profile_grouped_peaks_${mark}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup --yMin -0.5 --yMax 0.5
# done

# printf "computing CG density matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/grouped_v2_*.bed -S annotations/CG_density.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz --missingDataAsZero
# printf "plotting CG density profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz -out manuscript/plots/profile_grouped_peaks_cgdensity.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel "CG density" --yAxisLabel "CG density" --perGroup
# printf "plotting CG density profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz -out manuscript/plots/profile_grouped_peaks_cgdensity_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel "CG density" --yAxisLabel "CG density" --perGroup

####  To get annotations closest to grouped peaks

# printf "Group\tDistance\tAnnotation\tNumber\n" > manuscript/deeptools/closest_annot_to_grouped_peaks_annotations.txt
# awk '{print $4}' annotations/all_annotations.bed | awk -F":" '{print $1}' | sort | uniq -c | awk -v OFS="\t" '{print "All","Bound",$2,$1}' >> manuscript/deeptools/closest_annot_to_grouped_peaks_annotations.txt
# for file in manuscript/deeptools/grouped_vf_*.bed
# do
	# namext=${file##*/grouped_vf_}
	# name=${namext%.bed}
	# bedtools closest -a $file -b annotations/all_annotations.bed -d | awk -v OFS="\t" '{print $4,$5,$6,$7,$10,$9}' > manuscript/deeptools/closest_annot_to_${name}_peaks_info.bed
	# awk '$5==0 {print $4}' manuscript/deeptools/closest_annot_to_${name}_peaks_info.bed | awk -F":" '{print $1}' | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,"Bound",$2,$1}' >> manuscript/deeptools/closest_annot_to_grouped_peaks_annotations.txt
	# awk '$5<500 {print $4}' manuscript/deeptools/closest_annot_to_${name}_peaks_info.bed | awk -F":" '{print $1}' | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,"Closest_500bp",$2,$1}' >> manuscript/deeptools/closest_annot_to_grouped_peaks_annotations.txt
	# awk '$5>500 && $5<1000 {print $4}' manuscript/deeptools/closest_annot_to_${name}_peaks_info.bed | awk -F":" '{print $1}' | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,"Closest_1000bp",$2,$1}' >> manuscript/deeptools/closest_annot_to_grouped_peaks_annotations.txt
	# awk '$5>1000 {print $4}' manuscript/deeptools/closest_annot_to_${name}_peaks_info.bed | awk -F":" '{print $1}' | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,"Closest_over1000bp",$2,$1}' >> manuscript/deeptools/closest_annot_to_grouped_peaks_annotations.txt
# done

### randomomize the common peak file to see if some annotations are overrepresented

# awk -v OFS="\t" '$1~/^[1-9]/ {print $1,$3}' annotations/chromsize.bed > annotations/ath.genome
# bedtools shuffle -i manuscript/deeptools/grouped_vf_common_peaks.bed -g annotations/ath.genome > manuscript/deeptools/grouped_vf_random_peaks.bed

####  To annotate grouped peaks

# printf "Group\tAnnotation\tNumber\n" > manuscript/deeptools/annotated_grouped_peaks.txt
# for file in manuscript/deeptools/grouped_vf_*.bed
# do
	# namext=${file##*/grouped_vf_}
	# name=${namext%.bed}
	# printf "$name\n\n"
	# awk -v OFS="\t" -v n=$name '{print $1,$2,$3,n"_"NR,".","."}' $file > manuscript/deeptools/temp0_${name}.bed
	# annotatePeaks.pl manuscript/deeptools/temp0_${name}.bed tair10 -gff annotations/Araport11_GFF3_genes_transposons.201606.gff -annStats manuscript/deeptools/stats_${name}_peaks.txt > manuscript/deeptools/tempA_${name}.txt
	# awk -v OFS="\t" '(NR>1) && ($9=="Intergenic" || $9=="Chr") {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="promoter-TSS" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1b_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="TTS" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1c_${name}.bed
	# awk -v OFS="\t" '(NR>1) && ($9=="exon" || $9=="3'"'"'" || $9=="5'"'"'") {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1d_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="intron" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1e_${name}.bed
	# bedtools intersect -a manuscript/deeptools/temp1_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1b_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2b_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1c_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2c_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1d_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2d_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1e_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2e_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE"; else print $1,$2,$3,$4,"Intergenic"}' manuscript/deeptools/temp2_${name}.txt > manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_prom"; else print $1,$2,$3,$4,"Promoter"}' manuscript/deeptools/temp2b_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_TTS"; else print $1,$2,$3,$4,"TTS"}' manuscript/deeptools/temp2c_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_exon"; else print $1,$2,$3,$4,"Exon"}' manuscript/deeptools/temp2d_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_intron"; else print $1,$2,$3,$4,"Intron"}' manuscript/deeptools/temp2e_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($5=="TE_intron" || $5=="TE_exon") print $1,$2,$3,$4,"TE_gene"; else print $1,$2,$3,$4,$5}' manuscript/deeptools/temp3_${name}.txt > manuscript/deeptools/temp4_${name}.txt
	# sort -k1,1 -k2,2n manuscript/deeptools/temp4_${name}.txt | uniq | awk -v OFS="\t" '{print $4,$5}' | sort -k1,1 > manuscript/deeptools/annotated_peaks_${name}.txt
	# head manuscript/deeptools/annotated_peaks_${name}.txt
	# awk -v OFS="\t" '{print $2}' manuscript/deeptools/annotated_peaks_${name}.txt | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,$2,$1}' >> manuscript/deeptools/annotated_grouped_peaks.txt
# done
# rm manuscript/deeptools/temp*

####################################################################################################################################################

# #### merging all the peaks and then clustering

# cat manuscript/peaks_MBD1.bed manuscript/peaks_MBD2.bed manuscript/peaks_MBD4.bed manuscript/peaks_MBD5.bed manuscript/peaks_MBD6.bed manuscript/peaks_SUVH1.bed manuscript/peaks_SUVH3.bed | sort -k1,1n -k2,2n > manuscript/peals_all.bed
# bedtools merge -i manuscript/peals_all.bed | awk -v OFS="\t" '{print $1,$2,$3,"Peak_"NR}' > manuscript/peaks_merged.bed

# printf "computing complete matrix for all peaks (pass1)\n"
# computeMatrix scale-regions -R manuscript/peaks_merged.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/complete.gz
# for clusters in 11 12 13 14
# do
	# printf "plotting complete heatmap for all peaks in $clusters clusters\n"
	# plotHeatmap -m manuscript/deeptools/complete.gz -out manuscript/plots/heatmap_complete_k${clusters}.pdf --sortRegions descend --sortUsing mean --sortUsingSamples 3 6 --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar" --kmeans ${clusters} --outFileSortedRegions manuscript/deeptools/regions_complete_k${clusters}.txt
	# for (( i=1; i<=$clusters; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v i=$clust '$13==i' manuscript/deeptools/regions_complete_k${clusters}.txt | wc -l | awk -v i=$i '{print "cluster_"i"("$1")"}' >> manuscript/deeptools/complete_clusters${clusters}_size.txt
	# done
	# clustername=$(cat manuscript/deeptools/complete_clusters${clusters}_size.txt)
	# printf "computing complete matrix for all peaks (pass2)\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_complete_k${clusters}.txt -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/complete.gz
	# plotHeatmap -m manuscript/deeptools/complete.gz -out manuscript/plots/heatmap_complete_k${clusters}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar"
# done


###### TO reorganize regions from k10

# rm manuscript/deeptools/temp3_*.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_7" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_5" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_4" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_6" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_9" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed
# awk -v OFS="\t" '$13=="cluster_8" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD2_MBD5_MBD6.bed
# awk -v OFS="\t" '$13=="cluster_10" {print $1,$2,$3}' manuscript/deeptools/regions_complete_k10.txt >> manuscript/deeptools/temp3_MBD1_MBD2.bed

# rm manuscript/deeptools/regions_name_reorganized.txt
# for name in MBD5_MBD6 MBD5_MBD6_SUVH1_SUVH3 MBD2_MBD5_MBD6 MBD1_MBD2
# do
	# label=$(wc -l manuscript/deeptools/temp3_${name}.bed | awk -v i=$name '{print i"("$1")"}')
	# printf "$label " >> manuscript/deeptools/regions_name_reorganized.txt
# done
# clustername=$(cat manuscript/deeptools/regions_name_reorganized.txt)
# printf "cluster names: $clustername\n"

# printf "computing matrix with reorganized clusters\n"
# computeMatrix scale-regions -R manuscript/deeptools/temp3_MBD5_MBD6.bed manuscript/deeptools/temp3_MBD5_MBD6_SUVH1_SUVH3.bed manuscript/deeptools/temp3_MBD2_MBD5_MBD6.bed manuscript/deeptools/temp3_MBD1_MBD2.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_regorganized.gz

# printf "plotting heatmap for reorganized peaks\n"	
# plotHeatmap -m manuscript/deeptools/matrix_regorganized.gz -out manuscript/plots/heatmap_complete_reorganized.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions manuscript/deeptools/regions_reorganized.txt


######################################################################################################################################################################
#### Looking at each TE family (by superfamily) individually

# rm TEs/list_*.txt
# while read fam super totnb
# do
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' >> TEs/superfam_${super}.bed
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' > TEs/${super}_${fam}.bed
	# nb=$(wc -l TEs/${super}_${fam}.bed | awk '{print $1}')
	# awk -v s=$super -v n=$nb 'BEGIN {print s,n}' >> TEs/list_superfam.txt
	# awk -v f=$fam -v n=$nb 'BEGIN {print f,n}' >> TEs/list_${super}.txt
	# if [[ $nb != $totnb ]]
	# then
		# printf "ERROR\n$fam is a $super TE and is supposed to have $totnb TEs ($nb found)\nERROR\n\n"
	# fi
# done < TEs/all_superfamilies.txt
# supername=$(awk '{print $1}' TEs/list_superfam.txt | sort | uniq | awk -v ORS=" " '{print $1}')
# printf "$supername\n"

# rm TEs/label_superfam.txt
# for super in $supername
# do
	# awk -v s=$super '$1==s {t+=$2} END {print s"("t")"}' TEs/list_superfam.txt >> TEs/label_superfam.txt
# done
# superlabel=$(awk -v ORS=" " '{print $1}' TEs/label_superfam.txt)
# printf "$superlabel\n"

# printf "computing matrix for all TEs superfamilies\n"
# computeMatrix reference-point --referencePoint "TSS" -R TEs/superfam_*.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 6000 -p 10 -o TEs/matrix_TEs_superfam.gz

# printf "plotting heatmap for all TEs superfamilies\n"	
# plotHeatmap -m TEs/matrix_TEs_superfam.gz -out TEs/heatmap_TEs_superfamilies.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --regionsLabel ${superlabel} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions TEs/regions_TEs_superfam.txt

# for super in $supername
# do
	# mean=$(awk '{l=$3-$2; s+=l; t+=1} END {n=(s/t)/100; print int(n)*100}' TEs/superfam_${super}.bed)
	# max=$(awk 'BEGIN {m=0} {l=$3-$2; if (l>m) m=l} END {n=(m+500)/100; print int(n)*100}' TEs/superfam_${super}.bed)
	# printf "Superfamily: ${super}\tMean length TEs: ${mean}\tMax length TEs: ${max}\n"
	# famname=$(awk '{print $1}' TEs/list_${super}.txt | sort | uniq | awk -v ORS=" " '{print $1}')
	# printf "TE families: $famname\n"
	# rm TEs/label_${super}.txt
	# for fam in $famname
	# do
		# awk -v f=$fam '$1==f {t+=$2} END {print f"("t")"}' TEs/list_${super}.txt >> TEs/label_${super}.txt
	# done
	# famlabel=$(awk -v ORS=" " '{print $1}' TEs/label_${super}.txt)
	# printf "famlabel: $famlabel\n"
	
	# printf "computing v2 matrix for ${super} superfamily\n" 
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/${super}_*.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/mC_WT_seedlings.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 500 -a ${mean} -p 10 -o TEs/matrix_TEs_${super}_v2.gz

	# printf "plotting v2 heatmap for ${super} superfamily\n"	
	# plotHeatmap -m TEs/matrix_TEs_${super}_v2.gz -out TEs/heatmap_TEs_${super}_v2.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Oranges' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT mC RNA Genes TEs --regionsLabel ${famlabel} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions TEs/regions_TEs_${super}.txt
		
	# printf "computing matrix for plotting ChIP profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_ChIP.gz
	
	# printf "plotting ChIP profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_ChIP.gz -out TEs/profile_TEs_${super}_chip_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --yAxisLabel "log2 ChIP vs Input" --numPlotsPerRow 4 --perGroup

	# printf "computing matrix for plotting mCG profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S methyl/CG_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CG_mbd1_2_5_6.bw methyl/CG_SUVH1_SUVH3.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_mCG.gz	
	
	# printf "plotting mCG profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_mCG.gz -out TEs/profile_TEs_${super}_mCG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCG (%)" --numPlotsPerRow 4 --perGroup
	
	# printf "computing matrix for plotting mCHG profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S methyl/CHG_WT_seedlings.bw methyl/CHG_MBD5_MBD6.bw methyl/CHG_mbd1_2_5_6.bw methyl/CHG_SUVH1_SUVH3.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_mCHG.gz	
	
	# printf "plotting mCHG profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_mCHG.gz -out TEs/profile_TEs_${super}_mCHG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHG (%)" --numPlotsPerRow 4 --perGroup
	
	# printf "computing matrix for plotting mCHH profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S methyl/CHH_WT_seedlings.bw methyl/CHH_MBD5_MBD6.bw methyl/CHH_mbd1_2_5_6.bw methyl/CHH_SUVH1_SUVH3.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_mCHH.gz	
	
	# printf "plotting mCHH profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_mCHH.gz -out TEs/profile_TEs_${super}_mCHH_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHH (%)" --numPlotsPerRow 4 --perGroup
	
	# printf "computing matrix for plotting CG density profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S annotations/CG_density.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_CGdensity.gz --missingDataAsZero
	
	# printf "plotting CG density profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_CGdensity.gz -out TEs/profile_TEs_${super}_CGdensity_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "AU" --numPlotsPerRow 4 --perGroup
	# plotProfile -m TEs/matrix_TEs_${super}_CGdensity.gz -out TEs/profile_TEs_${super}_CGdensity_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "AU" --numPlotsPerRow 4 --perGroup
	
	# printf "computing matrix for plotting H3 profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S histones/WT_H3.bw histones/MBD5_6_H3.bw histones/MBD2_5_6_H3.bw histones/MBD1_2_5_6_H3.bw histones/MBD1_2_4_H3.bw histones/SUVH1_3_H3.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_H3.gz	
	
	# printf "plotting other profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_H3.gz -out TEs/profile_TEs_${super}_H3_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD2/5/6 MBD1/2/5/6 MBD1/2/4 SUVH1/3 --regionsLabel ${famlabel} --yAxisLabel "H3K9me2 (logvsH3)" --numPlotsPerRow 4 --perGroup
	# plotProfile -m TEs/matrix_TEs_${super}_H3.gz -out TEs/profile_TEs_${super}_H3_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD2/5/6 MBD1/2/5/6 MBD1/2/4 SUVH1/3 --regionsLabel ${famlabel} --yAxisLabel "H3 (logvsInput)" --numPlotsPerRow 4 --perGroup

	# printf "computing matrix for plotting H3K9me2 profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S histones/WT_H3K9me2.bw histones/MBD5_6_H3K9me2.bw histones/MBD2_5_6_H3K9me2.bw histones/MBD1_2_5_6_H3K9me2.bw histones/MBD1_2_4_H3K9me2.bw histones/SUVH1_3_H3K9me2.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_H3K9me2.gz	
	
	# printf "plotting other profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_H3K9me2.gz -out TEs/profile_TEs_${super}_H3K9me2_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD2/5/6 MBD1/2/5/6 MBD1/2/4 SUVH1/3 --regionsLabel ${famlabel} --yAxisLabel "H3K9me2 (logvsH3)" --numPlotsPerRow 4 --perGroup
	# plotProfile -m TEs/matrix_TEs_${super}_H3K9me2.gz -out TEs/profile_TEs_${super}_H3K9me2_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD2/5/6 MBD1/2/5/6 MBD1/2/4 SUVH1/3 --regionsLabel ${famlabel} --yAxisLabel "H3K9me2 (logvsH3)" --numPlotsPerRow 4 --perGroup
	
	# printf "computing matrix for plotting H3K27me3 profiles of ${super} superfamily\n" 
	# computeMatrix scale-regions -R TEs/${super}_*.bed -S histones/WT_H3K27me3.bw histones/MBD2_5_6_H3K27me3.bw histones/MBD1_2_5_6_H3K27me3.bw histones/MBD1_2_4_H3K27me3.bw histones/SUVH1_3_H3K27me3.bw -bs 20 -b 500 -a 500 -m ${mean} -p 10 -o TEs/matrix_TEs_${super}_H3K27me3.gz	
	
	# printf "plotting other profiles for ${super} superfamily\n"
	# plotProfile -m TEs/matrix_TEs_${super}_H3K27me3.gz -out TEs/profile_TEs_${super}_H3K27me3_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD2/5/6 MBD1/2/5/6 MBD1/2/4 SUVH1/3 --yAxisLabel "H3K27me3 (logvsH3)" --numPlotsPerRow 4 --perGroup
# done

######################################################################################################################################################################
#### Looking at each TE family individually

# rm TEs/names*
# while read fam super totnb
# do
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' > TEs/indiv_${fam}.bed
	# nb=$(wc -l TEs/${super}_${fam}.bed | awk '{print $1}')
	# if [[ $nb != $totnb ]]
	# then
		# printf "ERROR\n$fam is a $super TE and is supposed to have $totnb TEs ($nb found)\nERROR\n\n"
	# fi
	# mean=$(awk '{l=$3-$2; s+=l; t+=1} END {n=(s/t)/100; print int(n)*100}' TEs/indiv_${fam}.bed)
	# if [[ $mean -le 1000 ]]
	# then
		# awk -v f=$fam -v n=$nb 'BEGIN {print f,n}' >> TEs/names.txt
		# awk -v f=$fam -v n=$nb 'BEGIN {print f,n}' >> TEs/names_short.txt
		# mv TEs/indiv_${fam}.bed TEs/indiv_short_${fam}.bed
	# else
		# awk -v f=$fam -v n=$nb 'BEGIN {print f,n}' >> TEs/names.txt
		# awk -v f=$fam -v n=$nb 'BEGIN {print f,n}' >> TEs/names_long.txt
		# mv TEs/indiv_${fam}.bed TEs/indiv_long_${fam}.bed
	# fi
# done < TEs/all_superfamilies.txt
# name=$(awk '{print $1}' TEs/names.txt | sort | uniq | awk -v ORS=" " '{print $1}')
# shortname=$(awk '{print $1}' TEs/names_short.txt | sort | uniq | awk -v ORS=" " '{print $1}')
# longname=$(awk '{print $1}' TEs/names_long.txt | sort | uniq | awk -v ORS=" " '{print $1}')
# printf "$name\n"

# rm TEs/labels*
# for fam in $name
# do
	# awk -v f=$fam '$1==f {t+=$2} END {print f"("t")"}' TEs/names.txt >> TEs/labels.txt
# done
# label=$(awk -v ORS=" " '{print $1}' TEs/labels.txt)
# printf "$label\n"

# for fam in $shortname
# do
	# awk -v f=$fam '$1==f {t+=$2} END {print f"("t")"}' TEs/names_short.txt >> TEs/labels_short.txt
# done
# shortlabel=$(awk -v ORS=" " '{print $1}' TEs/labels_short.txt)

# for fam in $longname
# do
	# awk -v f=$fam '$1==f {t+=$2} END {print f"("t")"}' TEs/names_long.txt >> TEs/labels_long.txt
# done
# longlabel=$(awk -v ORS=" " '{print $1}' TEs/labels_long.txt)

### scale regions by superfmily

# printf "computing matrix for plotting ChIP profiles of ${super} superfamily\n" 
# computeMatrix scale-regions -R TEs/indiv_*.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 500 -a 500 -m 1000 -p 10 -o TEs/matrix_TEs_indiv_ChIP.gz
	
# printf "plotting ChIP profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_ChIP.gz -out TEs/profile_TEs_indiv_chip_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --yAxisLabel "log2 ChIP vs Input" --numPlotsPerRow 4 --perGroup

# printf "computing matrix for plotting mCG profiles of all families\n" 
# computeMatrix scale-regions -R TEs/indiv_*.bed -S methyl/CG_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CG_mbd1_2_5_6.bw methyl/CG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o TEs/matrix_TEs_indiv_mCG.gz
	
# printf "plotting mCG profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_mCG.gz -out TEs/profile_TEs_indiv_mCG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCG (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting mCHG profiles of all families\n" 
# computeMatrix scale-regions -R TEs/indiv_*.bed -S methyl/CHG_WT_seedlings.bw methyl/CHG_MBD5_MBD6.bw methyl/CHG_mbd1_2_5_6.bw methyl/CHG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o TEs/matrix_TEs_indiv_mCHG.gz	
	
# printf "plotting mCHG profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_mCHG.gz -out TEs/profile_TEs_indiv_mCHG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHG (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting mCHH profiles of all families\n" 
# computeMatrix scale-regions -R TEs/indiv_*.bed -S methyl/CHH_WT_seedlings.bw methyl/CHH_MBD5_MBD6.bw methyl/CHH_mbd1_2_5_6.bw methyl/CHH_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o TEs/matrix_TEs_indiv_mCHH.gz	
	
# printf "plotting mCHH profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_mCHH.gz -out TEs/profile_TEs_indiv_mCHH_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHH (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting CG density profiles of all families\n" 
# computeMatrix scale-regions -R TEs/indiv_*.bed -S annotations/CG_density.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o TEs/matrix_TEs_indiv_CGdensity.gz --missingDataAsZero
	
# printf "plotting CG density profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_CGdensity.gz -out TEs/profile_TEs_indiv_CGdensity_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "AU" --numPlotsPerRow 4 --perGroup

### ref-point for short families (mean < 1kb)

# printf "computing matrix for plotting ChIP profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_short*.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 500 -a 1500 -p 10 -o TEs/matrix_TEs_indiv_short_ChIP.gz
	
# printf "plotting ChIP profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_short_ChIP.gz -out TEs/profile_TEs_indiv_short_chip_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --yAxisLabel "log2 ChIP vs Input" --numPlotsPerRow 4 --perGroup

# printf "computing matrix for plotting mCG profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_short*.bed -S methyl/CG_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CG_mbd1_2_5_6.bw methyl/CG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1500 -p 10 -o TEs/matrix_TEs_indiv_short_mCG.gz
	
# printf "plotting mCG profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_short_mCG.gz -out TEs/profile_TEs_indiv_short_mCG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCG (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting mCHG profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_short*.bed -S methyl/CHG_WT_seedlings.bw methyl/CHG_MBD5_MBD6.bw methyl/CHG_mbd1_2_5_6.bw methyl/CHG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1500 -p 10 -o TEs/matrix_TEs_indiv_short_mCHG.gz	
	
# printf "plotting mCHG profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_short_mCHG.gz -out TEs/profile_TEs_indiv_short_mCHG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHG (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting mCHH profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_short*.bed -S methyl/CHH_WT_seedlings.bw methyl/CHH_MBD5_MBD6.bw methyl/CHH_mbd1_2_5_6.bw methyl/CHH_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1500 -p 10 -o TEs/matrix_TEs_indiv_short_mCHH.gz	
	
# printf "plotting mCHH profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_short_mCHH.gz -out TEs/profile_TEs_indiv_short_mCHH_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHH (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting CG density profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_short_*.bed -S annotations/CG_density.bw -bs 20 -b 1000 -a 1500 -p 10 -o TEs/matrix_TEs_indiv_short_CGdensity.gz --missingDataAsZero
	
# printf "plotting CG density profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_short_CGdensity.gz -out TEs/profile_TEs_indiv_short_CGdensity_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel CG_density --yAxisLabel "AU" --numPlotsPerRow 4 --perGroup

# ### ref-point for long families (mean > 1kb)

# printf "computing matrix for plotting ChIP profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_long*.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 500 -a 5000 -p 10 -o TEs/matrix_TEs_indiv_long_ChIP.gz
	
# printf "plotting ChIP profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_long_ChIP.gz -out TEs/profile_TEs_indiv_long_chip_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --samplesLabel MBD4 MBD1 MBD2 MBD5 MBD6 SUVH1 SUVH3 WT --yAxisLabel "log2 ChIP vs Input" --numPlotsPerRow 4 --perGroup

# printf "computing matrix for plotting mCG profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_long*.bed -S methyl/CG_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CG_mbd1_2_5_6.bw methyl/CG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 5000 -p 10 -o TEs/matrix_TEs_indiv_long_mCG.gz
	
# printf "plotting mCG profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_long_mCG.gz -out TEs/profile_TEs_indiv_long_mCG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCG (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting mCHG profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_long*.bed -S methyl/CHG_WT_seedlings.bw methyl/CHG_MBD5_MBD6.bw methyl/CHG_mbd1_2_5_6.bw methyl/CHG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 5000 -p 10 -o TEs/matrix_TEs_indiv_long_mCHG.gz	
	
# printf "plotting mCHG profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_long_mCHG.gz -out TEs/profile_TEs_indiv_long_mCHG_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHG (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting mCHH profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_long*.bed -S methyl/CHH_WT_seedlings.bw methyl/CHH_MBD5_MBD6.bw methyl/CHH_mbd1_2_5_6.bw methyl/CHH_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 5000 -p 10 -o TEs/matrix_TEs_indiv_long_mCHH.gz	
	
# printf "plotting mCHH profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_long_mCHH.gz -out TEs/profile_TEs_indiv_long_mCHH_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel "mCHH (%)" --numPlotsPerRow 4 --perGroup
	
# printf "computing matrix for plotting CG density profiles of all families\n" 
# computeMatrix reference-point --referencePoint "TSS" -R TEs/indiv_long_*.bed -S annotations/CG_density.bw -bs 20 -b 1000 -a 5000 -p 10 -o TEs/matrix_TEs_indiv_long_CGdensity.gz --missingDataAsZero
	
# printf "plotting CG density profiles for all families\n"
# plotProfile -m TEs/matrix_TEs_indiv_long_CGdensity.gz -out TEs/profile_TEs_indiv_long_CGdensity_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel CG_density --yAxisLabel "AU" --numPlotsPerRow 4 --perGroup

######## Choose families based on ChIP enrichments

### SUVH1/3 only: 	(long) VANDAL16 VANDAL20 (META1 VANDAL14 VANDAL15 VANDAL21 VANDAL7 VANDAL9)
###					(short) AT9NMU1 ATN9_1 (ATMUN1) ATDNA2T9B ATDNAI27T9A ATDNAI27T9B ATPOGON1 ATPOGON2 ATPOGON3 ATREP10A ATREP10B ATREP10D ATREP11A ATREP11 ATREP12 ATREP13 ATREP14 ATREP15 ATREP16 ATREP17 ATREP19 ATREP1 ATREP2A ATREP2 ATREP3 ATREP4 ATREP5 ATREP6 ATREP7 ATREP8 REP1 VANDAL18NA VANDAL18NB VANDAL1N1 VANDAL22 VANDAL2N1 VANDALNX1

### MBD5/6 only: (long) ATGP2 ATGP2N ATLANTYS1 ATLANTYS2 ATLINE1_1 ATLINE1_2 (ATGP1 ATLANTYS3) ATGP10 SADHU TAT1_ATH TSCL VANDAL12 

### SUVH1/3 + MBD5/6: ATDNA12T3_2 ATHATN10 ATHATN1 ATHATN2 ATHATN3A ATHATN3 ATHATN4 ATHATN5 ATHATN7 ATMU2 ATMU3N1 ATMU4 ATMU6 ATMU6N1 ATMU7 ATMUN2 ATHATN6 ATLINE1A ATMU10 ATMU1 ATREP10 BOMZH1 BOMZH2 ATSINE4 RathE1_cons RathE2_cons RathE3_cons SIMPLEGUY1 SIMPLEHAT1 SIMPLEHAT2 TAG3N1 ATMU3

### MBD1: ATGP9LTR

#### splitting TEs into borders and middle

# rm TEs/split_*
# printf "TE_ID\tSuperfamily\tFamily\tPosition\tMBD1\tMBD2\tMBD4\tMBD5\tMBD6\tSUVH1\tSUVH3\tWT\tCGdensity\tmCG_WT\tmCHG_WT\tmCHH_WT\tmCG_MBD5/6\tmCHG_MBD5/6\tmCHH_MBD5/6\tmCG_SUVH1/3\tmCHG_SUVH1/3\tmCHH_SUVH1/3\n" > TEs/Total_TEs_enrichment.txt
# while read fam super totnb
# do
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' | sort -k1,1n -k2,2n > TEs/indiv_${fam}.bed
	# awk -v OFS="\t" -v f=$fam '{n=($3-$2)/100; a=$2+int(10*n); b=$3-int(10*n); c=$2+int(25*n); d=$3-int(25*n); print $1,$2,a,$4"_"NR,$5,$6 >> "TEs/split_5prime_"f".bed"; print $1,b,$3,$4"_"NR,$5,$6 >> "TEs/split_3prime_"f".bed"; print $1,c,d,$4"_"NR,$5,$6 >> "TEs/split_mid_"f".bed"}' TEs/indiv_${fam}.bed
	# for type in 5prime 3prime mid
	# do
		# multiBigwigSummary BED-file -b manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw annotations/CG_density.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CHG_MBD5_MBD6.bw methyl/CHH_MBD5_MBD6.bw methyl/CG_SUVH1_SUVH3.bw methyl/CHG_SUVH1_SUVH3.bw methyl/CHH_SUVH1_SUVH3.bw -o TEs/split_${type}_${fam}.npz -p 10 --BED TEs/split_${type}_${fam}.bed --outRawCounts TEs/split_${type}_${fam}.tab
		# sort -k1,1n -k2,2n TEs/split_${type}_${fam}.tab | awk -v OFS="\t" -v s=$super -v f=$fam -v t=$type 'NR>1 {n=NR-1; $1=s; $2=f; $3=t; print f"_"n,$0}' >> TEs/Total_TEs_enrichment.txt
	# done
# done < TEs/all_superfamilies.txt

# head TEs/Total_TEs_enrichment.txt
# tail TEs/Total_TEs_enrichment.txt

# #### getting average over whole TE

# printf "TE_ID\tSuperfamily\tFamily\tLength\tMBD1\tMBD2\tMBD4\tMBD5\tMBD6\tSUVH1\tSUVH3\tWT\tmCG_WT\tmCHG_WT\tmCHH_WT\tmCG_MBD5_6\tmCHG_MBD5_6\tmCHH_MBD5_6\tmCG_SUVH1_3\tmCHG_SUVH1_3\tmCHH_SUVH1_3\tPeaks_MBD1\tPeaks_MBD2\tPeaks_MBD4\tPeaks_MBD5\tPeaks_MBD6\tPeaks_SUVH1\tPeaks_SUVH3\tPeaks_WT\n" > TEs/Total_TEs_means.txt

# while read fam super totnb
# do
	# printf "$super $fam $totnb\n"
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3}' > TEs/indiv_${fam}.bed
	# multiBigwigSummary BED-file -b manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CHG_MBD5_MBD6.bw methyl/CHH_MBD5_MBD6.bw methyl/CG_SUVH1_SUVH3.bw methyl/CHG_SUVH1_SUVH3.bw methyl/CHH_SUVH1_SUVH3.bw -o TEs/indiv_${fam}.npz -p 10 --BED TEs/indiv_${fam}.bed --outRawCounts TEs/mean_${fam}.tab
	# nb=$(awk '$0 ~ /^[1-9]/' TEs/mean_${fam}.tab | wc -l | awk '{print $1}')
	# printf "bigwig summary for ${fam}: ${nb}\n"
	# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
	# do
		# bedtools intersect -c -a TEs/indiv_${fam}.bed -b manuscript/peaks_${sample}.bed | awk -v OFS="\t" '{if ($4>0) p="Yes"; else p="No"; print p}' > TEs/peaks_${fam}_${sample}.txt
	# done
	# sort -k1,1n -k2,2n TEs/mean_${fam}.tab | awk -v OFS="\t" '$0 ~ /^[1-9]/' | paste --delimiters="\t" - TEs/peaks_${fam}_*.txt | awk -v OFS="\t" -v s=$super -v f=$fam '{l=$3-$2; $1=s; $2=f; $3=l; print f"_"NR,$0}' >> TEs/Total_TEs_means.txt
# done < TEs/all_superfamilies.txt

# head TEs/Total_TEs_means.txt
# tail TEs/Total_TEs_means.txt

# rm TEs/*.npz

#############################################################################################################################################################################
####### 

#### To get RNA difference between mutant and WT

# for strand in for rev
# do
	# printf "WT $strand a+b\n"
	# bigwigCompare -b1 rna/WT_a_${strand}.bw -b2 rna/WT_b_${strand}.bw --operation add -bs 1 -p 10 -o rna/WT_temp_${strand}.bw
	# printf "WT $strand a+b+c\n"
	# bigwigCompare -b1 rna/WT_temp_${strand}.bw -b2 rna/WT_c_${strand}.bw --operation add -bs 1 -p 10 -o rna/WT_sum_${strand}.bw
# done

# for sample in suvh1_3 mbd1_2_5_6 mbd1_2_4 mbd2_5_6
# do
	# for strand in for rev
	# do
		# printf "$sample $strand a+b\n"
		# bigwigCompare -b1 rna/${sample}_a_${strand}.bw -b2 rna/${sample}_b_${strand}.bw --operation add -bs 1 -p 10 -o rna/${sample}_temp_${strand}.bw
		# printf "$sample $strand a+b+c\n"
		# bigwigCompare -b1 rna/${sample}_temp_${strand}.bw -b2 rna/${sample}_c_${strand}.bw --operation add -bs 1 -p 10 -o rna/${sample}_sum_${strand}.bw
		# printf "$sample $strand ratio WT\n"
		# bigwigCompare -b1 rna/${sample}_sum_${strand}.bw -b2 rna/WT_sum_${strand}.bw --operation ratio --pseudocount 0.1 -bs 1 -p 10 -o rna/${sample}_${strand}_FC.bw
		# if [[ $strand == "rev" ]]
		# then
			# bigwigCompare -b1 rna/${sample}_${strand}_FC.bw -b2 annotations/chromsize_null.bw --operation subtract -bs 1 -p 10 -o rna/${sample}_${strand}_minusFC.bw
		# fi
	# done
# done

# rm rna/*temp*.bw

# #### Plotting deeptools over bound TEs
# grep -v "TE:" annotations/all_annotations.bed > Genes/all_kind_of_genes.bed

# rm TEs/temp_super_*.bed
# rm TEs/names.txt
# while read fam super totnb
# do
	# printf "$super $fam $totnb\n"
	# awk -v s=$super 'BEGIN {print s}' >> TEs/names.txt
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3}' >> TEs/temp_super_${super}.bed
# done < TEs/all_superfamilies.txt
# name=$(awk '{print $1}' TEs/names.txt | sort | uniq | awk -v ORS=" " '{print $1}')

# for super in $name
# do
	# sort -k1,1n -k2,2n TEs/temp_super_${super}.bed > TEs/super_${super}.bed
	# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
	# do
		# bedtools intersect -wa -a TEs/super_${super}.bed -b manuscript/peaks_${sample}.bed > TEs/chip_${sample}_bound_${super}.txt
	# done
	# cat TEs/chip_*_bound_${super}.txt | sort -k1,1n -k2,2n | uniq > TEs/bound_${super}.bed
	# nbound=$(wc -l TEs/bound_${super}.bed | awk '{print $1}')
	# bedtools intersect -v -f 1 -r -wa -a TEs/super_${super}.bed -b TEs/bound_${super}.bed > TEs/unbound_${super}.bed
	# nunbound=$(wc -l TEs/unbound_${super}.bed | awk '{print $1}')
	# ntot=$(wc -l TEs/super_${super}.bed | awk '{print $1}')
	# if [[ $((nbound + nunbound)) != $ntot ]]
	# then
		# printf "Problem! $nbound bound + $nunbound unbound is not equal to $ntot total\nMissing lines are:\n"
		# cat TEs/bound_${super}.bed TEs/unbound_${super}.bed | sort -k1,1n -k2,2n | comm - TEs/super_${super}.bed | head
		# exit
	# fi	
	# label=$(printf "Bound(${nbound}) Unbound(${nunbound})")
	# printf "computing ChIP matrix for $nbound bound and $nunbound unbound TEs from $ntot $super\n"
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/bound_${super}.bed TEs/unbound_${super}.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_chip.gz

	# printf "plotting ChIP heatmap for $super TEs\n"	
	# plotHeatmap -m TEs/matrix_${super}_chip.gz -out TEs/heatmap_${super}_chip.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT Genes TEs --regionsLabel $label --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 --whatToShow "heatmap and colorbar"
	# printf "plotting chip profile for $super TEs\n"
	# plotProfile -m TEs/matrix_${super}_chip.gz -out TEs/profile_${super}_CHH.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT Genes TEs --yAxisLabel "Log2FC (HA ChIP vs Input)" --numPlotsPerRow 4 --perGroup
	
	# printf "computing CG methylation matrix for $nbound bound and $nunbound unbound TEs from $ntot $super\n"
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/bound_${super}.bed TEs/unbound_${super}.bed -S methyl/CG_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CG_mbd1_2_4.bw methyl/CG_mbd1_2_5_6.bw methyl/CG_mbd2_5_6.bw methyl/CG_SUVH1_SUVH3.bw -bs 100 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_CG.gz

	# printf "plotting CG methylation heatmap for $super TEs\n"
	# plotHeatmap -m TEs/matrix_${super}_CG.gz -out TEs/heatmap_${super}_CG.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting CG methylation profile for $super TEs\n"
	# plotProfile -m TEs/matrix_${super}_CG.gz -out TEs/profile_${super}_CG.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "mCG (%)" --numPlotsPerRow 4 --perGroup --yMin 0
	
	# printf "computing CHG methylation matrix for $nbound bound and $nunbound unbound TEs from $ntot $super\n"
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/bound_${super}.bed TEs/unbound_${super}.bed -S methyl/CHG_WT_seedlings.bw methyl/CHG_MBD5_MBD6.bw methyl/CHG_mbd1_2_4.bw methyl/CHG_mbd1_2_5_6.bw methyl/CHG_mbd2_5_6.bw methyl/CHG_SUVH1_SUVH3.bw -bs 100 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_CHG.gz

	# printf "plotting CHG methylation heatmap for $super TEs\n"
	# plotHeatmap -m TEs/matrix_${super}_CHG.gz -out TEs/heatmap_${super}_CHG.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting CHG methylation profile for $super TEs\n"
	# plotProfile -m TEs/matrix_${super}_CHG.gz -out TEs/profile_${super}_CHG.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "mCHG (%)" --numPlotsPerRow 4 --perGroup --yMin 0
	
	# printf "computing CHH methylation matrix for $nbound bound and $nunbound unbound TEs from $ntot $super\n"
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/bound_${super}.bed TEs/unbound_${super}.bed -S methyl/CHH_WT_seedlings.bw methyl/CHH_MBD5_MBD6.bw methyl/CHH_mbd1_2_4.bw methyl/CHH_mbd1_2_5_6.bw methyl/CHH_mbd2_5_6.bw methyl/CHH_SUVH1_SUVH3.bw -bs 100 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_CHH.gz

	# printf "plotting CHH methylation heatmap for $super TEs\n"
	# plotHeatmap -m TEs/matrix_${super}_CHH.gz -out TEs/heatmap_${super}_CHH.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting CHH methylation profile for $super TEs\n"
	# plotProfile -m TEs/matrix_${super}_CHH.gz -out TEs/profile_${super}_CHH.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "mCHH (%)" --numPlotsPerRow 4 --perGroup --yMin 0
	
	# printf "computing rna matrix for $nbound bound and $nunbound unbound TEs from $ntot $super\n" suvh1_3 mbd1_2_5_6 mbd1_2_4 mbd2_5_6
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/bound_${super}.bed TEs/unbound_${super}.bed -S rna/mbd1_2_4_log2.bw rna/mbd1_2_5_6_log2.bw rna/mbd2_5_6_log2.bw rna/suvh1_3_log2.bw -bs 50 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_rna.gz

	# printf "plotting rna heatmap for $super TEs\n"
	# plotHeatmap -m TEs/matrix_${super}_rna.gz -out TEs/heatmap_${super}_rna.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting rna profile for $super TEs\n"
	# plotProfile -m TEs/matrix_${super}_rna.gz -out TEs/profile_${super}_rna.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA" --numPlotsPerRow 4 --perGroup
	# plotProfile -m TEs/matrix_${super}_rna.gz -out TEs/profile_${super}_rna_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --samplesLabel MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA" --numPlotsPerRow 4 --perGroup
	
	# printf "computing CG density matrix for $super TEs\n" 
	# computeMatrix reference-point --referencePoint "TSS" -R TEs/bound_${super}.bed TEs/unbound_${super}.bed -S annotations/CG_density.bw -bs 20 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_density.gz --missingDataAsZero
	# printf "plotting CG density heatmap for $super TEs\n"
	# plotHeatmap -m TEs/matrix_${super}_density_reg.gz -out TEs/heatmap_${super}_density.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --refPointLabel "TSS" --xAxisLabel "TEs" --samplesLabel CG_density --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting CG density profiles for $super TEs\n"
	# plotProfile -m TEs/matrix_${super}_density.gz -out TEs/profile_${super}_density_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --refPointLabel "TSS" --samplesLabel CG_density --yAxisLabel "CG density" --numPlotsPerRow 4 --perGroup
	# plotProfile -m TEs/matrix_${super}_density.gz -out TEs/profile_${super}_density_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --refPointLabel "TSS" --samplesLabel CG_density --yAxisLabel "CG density" --numPlotsPerRow 4 --perGroup
	
	# ##### Get the closest genes from bound and unbound TEs and plot heatmaps
	# rm TEs/problem.bed
	# bedtools closest -a TEs/bound_${super}.bed -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super '{if ($10==0 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_0.bed"; else if ($10==0 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_0.bed"; else if ($10>1000 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_down_over1000.bed"; else if ($10>1000 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_down_over1000.bed"; else if ($10>500 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_down_500to1000.bed"; else if ($10>500 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_down_500to1000.bed"; else if ($10>0 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_down_0to500.bed"; else if ($10>0 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_down_0to500.bed"; else if ($10<-1000 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_up_over1000.bed"; else if ($10<-1000 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_up_over1000.bed"; else if ($10<-500 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_up_500to1000.bed"; else if ($10<-500 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_up_500to100.bed"; else if ($10<0 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_bound_up_0to500.bed"; else if ($10<0 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_bound_up_0to500.bed"; else print "ERROR",$0 > "TEs/problem.bed"}'
	# bedtools closest -a TEs/unbound_${super}.bed -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super '{if ($10==0 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_0.bed"; else if ($10==0 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_0.bed"; else if ($10>1000 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_down_over1000.bed"; else if ($10>1000 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_down_over1000.bed"; else if ($10>500 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_down_500to1000.bed"; else if ($10>500 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_down_500to1000.bed"; else if ($10>0 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_down_0to500.bed"; else if ($10>0 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_down_0to500.bed"; else if ($10<-1000 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_up_over1000.bed"; else if ($10<-1000 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_up_over1000.bed"; else if ($10<-500 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_up_500to1000.bed"; else if ($10<-500 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_up_500to100.bed"; else if ($10<0 && $9=="+") print $4,$5,$6,$7,$10,$9>"TEs/for_"s"_temp_unbound_up_0to500.bed"; else if ($10<0 && $9=="-") print $4,$5,$6,$7,$10,$9>"TEs/rev_"s"_temp_unbound_up_0to500.bed"; else print "ERROR",$0 >> "TEs/problem.bed"}'
	# printf "Is there a problem?\n"
	# if [ -f TEs/problem.bed ] 
	# then
		# printf "Yes\n"
		# cat TEs/problem.bed
		# exit
	# else
		# printf "No\n"
	# fi
	# for strand in for rev
	# do
		# rm TEs/files.txt
		# for file in TEs/${strand}_${super}_temp_bound_0.bed TEs/${strand}_${super}_temp_bound_up_0to500.bed TEs/${strand}_${super}_temp_bound_up_500to1000.bed TEs/${strand}_${super}_temp_bound_up_over1000.bed TEs/${strand}_${super}_temp_bound_down_0to500.bed TEs/${strand}_${super}_temp_bound_down_500to1000.bed TEs/${strand}_${super}_temp_bound_down_over1000.bed TEs/${strand}_${super}_temp_unbound_up_0.bed TEs/${strand}_${super}_temp_unbound_up_0to500.bed TEs/${strand}_${super}_temp_unbound_up_500to1000.bed TEs/${strand}_${super}_temp_unbound_up_over1000.bed TEs/${strand}_${super}_temp_unbound_down_0to500.bed TEs/${strand}_${super}_temp_unbound_down_500to1000.bed TEs/${strand}_${super}_temp_unbound_down_over1000.bed
		# do
			# if [ -f $file ]
			# then
				# namext=${file##*/*temp_}
				# name=${namext%.bed}
				# nb=$(wc -l $file | awk '{print $1}')
				# awk -v s=$file -v n=$nb -v t=$name 'BEGIN {print s,t,n}' >> TEs/files.txt
			# fi
			# files=$(awk '{print $1}' TEs/files.txt | awk -v ORS=" " '{print $1}')
			# labels=$(awk '{print $2"("$3")"}' TEs/files.txt | awk -v ORS=" " '{print $1}')
		# done
		# printf "$strand genes closest to $super TEs\n"
		# printf "computing ChIP matrix on\n$files\n$labels\n"
		# computeMatrix reference-point --referencePoint "TSS" -R $files -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 6000 -p 10 -o TEs/matrix_${super}_chip_${strand}.gz

		# printf "plotting ChIP heatmap for $super TEs\n"	
		# plotHeatmap -m TEs/matrix_${super}_chip_${strand}.gz -out TEs/heatmap_${super}_chip_${strand}.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Blues' 'Reds' --interpolationMethod bilinear --refPointLabel "TSS" --xAxisLabel $super --regionsLabel $labels --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT Genes TEs --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 --whatToShow "heatmap and colorbar"
		
		# printf "computing chip matrix\n"
		# computeMatrix scale-regions -R $files -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${super}_chip_${strand}.gz

		# printf "plotting chip profiles for $super TEs\n"	
		# plotProfile -m TEs/matrix_${super}_chip_${strand}.gz -out TEs/strand_profile_${super}_chip_${strand}_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --plotTitle $super --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $labels --yAxisLabel "ChIP (log2HAvsInput)" --numPlotsPerRow 4 --perGroup
		
		# printf "computing RNA matrix\n"
		# computeMatrix scale-regions -R $files -S rna/mbd1_2_4_${strand}_FC.bw rna/mbd1_2_5_6_${strand}_FC.bw rna/mbd2_5_6_${strand}_FC.bw rna/suvh1_3_${strand}_FC.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${super}_rna_${strand}.gz

		# printf "plotting RNA profiles for $super TEs\n"	
		# plotProfile -m TEs/matrix_${super}_rna_${strand}.gz -out TEs/strand_profile_${super}_rna_${strand}_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --plotTitle $super --samplesLabel mbd1/2/4_FC mbd1/2/5/6_FC mbd2/5/6_FC suvh1/3_FC --regionsLabel $labels --yAxisLabel "RNA (FCvsWT)" --numPlotsPerRow 4 --perGroup
		# plotProfile -m TEs/matrix_${super}_rna_${strand}.gz -out TEs/strand_profile_${super}_rna_${strand}_median.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --plotTitle $super --samplesLabel mbd1/2/4_FC mbd1/2/5/6_FC mbd2/5/6_FC suvh1/3_FC --regionsLabel $labels --yAxisLabel "RNA (FCvsWT)" --numPlotsPerRow 4 --perGroup
	# done
# done

# rm TEs/temp*

### To make one tab file for distance plotting by family

# printf "Superfamily\tFamily\tSample\tGeneID\tDistance\n" > TEs/Bound_genes_complete.txt
# while read fam super totnb
# do
	# printf "$super $fam $totnb\n"
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3}' | sort -k1,1n -k2,2n > TEs/temp_super_${super}_fam_${fam}.bed
	# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
	# do
		# bedtools intersect -wa -a TEs/temp_super_${super}_fam_${fam}.bed -b manuscript/peaks_${sample}.bed | sort -k1,1n -k2,2n > TEs/temp_${sample}_bound_${super}_${fam}.txt
		# bedtools closest -a TEs/temp_${sample}_bound_${super}_${fam}.txt -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super -v f=$fam -v p=$sample '{print s,f,p,$7,$10}' >> TEs/Bound_genes_complete.txt
	# done
# done < TEs/all_superfamilies.txt

### To make one tab file for distance plotting bound vs unbound (gene within 1kb of TE)

# printf "Superfamily\tFamily\tTE_AGI\tSample\tBound\tGeneID\tDistance\n" > TEs/Bound_unbound_genes_complete2.txt
# while read fam super totnb
# do
	# printf "$super $fam $totnb\n"
	# grep -w $fam TEs/all_TEs.bed | awk -v OFS="\t" -v f=$fam '{print $1,$2,$3,$4,".","."}' | sort -k1,1n -k2,2n > TEs/super_${super}_fam_${fam}.bed
	# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
	# do
		# ntot=$(wc -l TEs/super_${super}_fam_${fam}.bed | awk '{print $1}')
		# bedtools intersect -wa -a TEs/super_${super}_fam_${fam}.bed -b manuscript/peaks_${sample}.bed | sort -k1,1n -k2,2n | uniq > TEs/temp_${sample}_bound_${super}_${fam}.txt
		# nbound=$(wc -l TEs/temp_${sample}_bound_${super}_${fam}.txt | awk '{print $1}')
		# if [[ ! -f TEs/temp_${sample}_bound_${super}_${fam}.txt ]] 
		# then
			# nbound=0
			# printf "$sample has $nbound bound TEs\n"
			# printf "getting closest genes for all (unbound) TEs\n"
			# bedtools closest -a TEs/super_${super}_fam_${fam}.bed -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super -v f=$fam -v p=$sample '$13<1000 && $13>-1000 {print s,f,$4,p,"Unbound",$10,$13}' >> TEs/Bound_unbound_genes_complete2.txt
		# elif [[ $nbound == $ntot ]]
		# then
			# nunbound=0
			# printf "$sample has $nunbound bound TEs\n"
			# printf "getting closest genes for all (bound) TEs\n"
			# bedtools closest -a TEs/super_${super}_fam_${fam}.bed -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super -v f=$fam -v p=$sample '$13<1000 && $13>-1000 {print s,f,$4,p,"Bound",$10,$13}' >> TEs/Bound_unbound_genes_complete2.txt
		# else
			# bedtools intersect -v -f 1 -r -wa -a TEs/super_${super}_fam_${fam}.bed -b TEs/temp_${sample}_bound_${super}_${fam}.txt > TEs/temp_${sample}_unbound_${super}_${fam}.txt
			# nunbound=$(wc -l TEs/temp_${sample}_unbound_${super}_${fam}.txt | awk '{print $1}')
			# if [[ $((nbound + nunbound)) != $ntot ]]
			# then
				# printf "Problem for ${sample}! $nbound bound + $nunbound unbound is not equal to $ntot total\n"
				# exit
			# fi	
			# printf "$nbound bound TEs\n"
			# bedtools closest -a TEs/temp_${sample}_bound_${super}_${fam}.txt -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super -v f=$fam -v p=$sample '$13<1000 && $13>-1000 {print s,f,$4,p,"Bound",$10,$13}' >> TEs/Bound_unbound_genes_complete2.txt
			# printf "$nunbound unbound TEs\n"
			# bedtools closest -a TEs/temp_${sample}_unbound_${super}_${fam}.txt -b Genes/all_kind_of_genes.bed -D b | awk -v OFS="\t" -v s=$super -v f=$fam -v p=$sample '$13<1000 && $13>-1000 {print s,f,$4,p,"Unbound",$10,$13}' >> TEs/Bound_unbound_genes_complete2.txt
		# fi
	# done
# done < TEs/all_superfamilies.txt
		
# rm TEs/temp*	
		
###############################################################################################################################################################################
#### To plot deeptools comparing TEs that "might" have an impact to transcription of neighbooring genes and TEs that don't

# for sample in SUVH1 SUVH3
# do
	# printf "$sample start with total TEs: "
	# wc -l TEs/all_TEs.bed
	# rm TEs/${sample}*bound_TEs.*
	# awk -v s=$sample '$4==s && $5=="Bound" {print $3}' TEs/Bound_unbound_genes_complete2.txt | sort | uniq > TEs/${sample}_bound_TEs.txt
	# awk -v s=$sample '$4==s && $5=="Unbound" {print $3}' TEs/Bound_unbound_genes_complete2.txt | sort | uniq > TEs/${sample}_unbound_TEs.txt

	# printf "getting bound TEs: "
	# while read TE
	# do
		# grep $TE TEs/all_TEs.bed >> TEs/${sample}_bound_TEs.bed
	# done < TEs/${sample}_bound_TEs.txt
	# wc -l TEs/${sample}_bound_TEs.bed

	# printf "getting unbound TEs: "
	# while read TE
	# do
		# grep $TE TEs/all_TEs.bed >> TEs/${sample}_unbound_TEs.bed
	# done < TEs/${sample}_unbound_TEs.txt
	# wc -l TEs/${sample}_unbound_TEs.bed

	# printf "getting TEs further than 1kb: "
	# cat TEs/${sample}_bound_TEs.bed TEs/${sample}_unbound_TEs.bed | sort -k1,1n -k2,2n | bedtools intersect -v -f 1 -r -wa -a TEs/all_TEs.bed -b stdin | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' > TEs/${sample}_further1kb_TEs.bed
	# wc -l TEs/${sample}_further1kb_TEs.bed

	# printf "getting influencial TEs, bound and unbound: "
	# cat -v TEs/${sample}_TE_close_to_gene_downregulated_in_suvh13.txt | sed 's;";;g' | sed 's;\^M;;g' > TEs/tmp_${sample}_TE_close_to_gene_downregulated_in_suvh13.txt
	# rm TEs/*influencial*
	# while read TE Bound
	# do
		# if [[ $Bound == "Bound" ]]
		# then
			# grep $TE TEs/${sample}_bound_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' >> TEs/${sample}_bound_influencial_TEs.bed
		# else
			# grep $TE TEs/${sample}_unbound_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' >> TEs/${sample}_unbound_influencial_TEs.bed
		# fi
	# done < TEs/tmp_${sample}_TE_close_to_gene_downregulated_in_suvh13.txt
	# wc -l TEs/${sample}_bound_influencial_TEs.bed
	# wc -l TEs/${sample}_unbound_influencial_TEs.bed

	# printf "getting uninfluencial TEs, bound and unbound: "
	# bedtools intersect -v -f 1 -r -wa -a TEs/${sample}_bound_TEs.bed -b TEs/${sample}_bound_influencial_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' > TEs/${sample}_bound_uninfluencial_TEs.bed
	# wc -l TEs/${sample}_bound_uninfluencial_TEs.bed
	# bedtools intersect -v -f 1 -r -wa -a TEs/${sample}_unbound_TEs.bed -b TEs/${sample}_unbound_influencial_TEs.bed | awk -v OFS="\t" '{print $1,$2,$3,$5,".",$6}' > TEs/${sample}_unbound_uninfluencial_TEs.bed	
	# wc -l TEs/${sample}_unbound_uninfluencial_TEs.bed

	# nBI=$(wc -l TEs/${sample}_bound_influencial_TEs.bed | awk '{print $1}')
	# nBU=$(wc -l TEs/${sample}_bound_uninfluencial_TEs.bed | awk '{print $1}')
	# nUI=$(wc -l TEs/${sample}_unbound_influencial_TEs.bed | awk '{print $1}')
	# nUU=$(wc -l TEs/${sample}_unbound_uninfluencial_TEs.bed | awk '{print $1}')
	# nO=$(wc -l TEs/${sample}_further1kb_TEs.bed | awk '{print $1}')
	# ntot=$(wc -l TEs/all_TEs.bed | awk '{print $1}')
	# sum=$((nBI+nBU+nUI+nUU+nO))

	# if [[ $((nBI+nBU+nUI+nUU+nO)) != $ntot ]]
	# then
		# printf "Problem for ${sample}! $nBI + $nBU + $nUI + $nUU + $nO (${sum}) different from $ntot\n"
		# cat TEs/${sample}_bound_influencial_TEs.bed TEs/${sample}_bound_uninfluencial_TEs.bed TEs/${sample}_unbound_influencial_TEs.bed TEs/${sample}_unbound_uninfluencial_TEs.bed TEs/${sample}_further1kb_TEs.bed | sort -k1,1n -k2,2n | uniq -c | awk '$1>1'
		# exit
	# fi
	# label=$(awk -v a=$nBI -v b=$nBU -v c=$nUI -v d=$nUU -v e=$nO 'BEGIN {print "Bound_influencial("a") Bound_uninfluencial("b") Unbound_influencial("c") Unbound_uninfluencial("d") Further_than1kb("e")"}')

	# printf "computing ChIP matrix for bound and influencial ${sample} TEs\n"
	# computeMatrix scale-regions -R TEs/${sample}_bound_influencial_TEs.bed TEs/${sample}_bound_uninfluencial_TEs.bed TEs/${sample}_unbound_influencial_TEs.bed TEs/${sample}_unbound_uninfluencial_TEs.bed TEs/${sample}_further1kb_TEs.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${sample}_influencial_chip.gz
	# printf "plotting ChIP heatmap for influencial ${sample} TEs\n"	
	# plotHeatmap -m TEs/matrix_${sample}_influencial_chip.gz -out TEs/heatmap_${sample}_influencial_chip.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "TEs" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 --whatToShow "heatmap and colorbar"
	# printf "plotting chip profile for ${sample} influencial TEs\n"
	# plotProfile -m TEs/matrix_${sample}_influencial_chip.gz -out TEs/profile_${sample}_influencial_chip.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --yAxisLabel "Log2FC (HA ChIP vs Input)" --numPlotsPerRow 4 --perGroup
		
	# printf "computing CG methylation matrix for ${sample} influencial TEs\n"
	# computeMatrix scale-regions -R TEs/${sample}_bound_influencial_TEs.bed TEs/${sample}_bound_uninfluencial_TEs.bed TEs/${sample}_unbound_influencial_TEs.bed TEs/${sample}_unbound_uninfluencial_TEs.bed TEs/${sample}_further1kb_TEs.bed -S methyl/CG_WT_seedlings.bw methyl/CG_MBD5_MBD6.bw methyl/CG_mbd1_2_4.bw methyl/CG_mbd1_2_5_6.bw methyl/CG_mbd2_5_6.bw methyl/CG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${sample}_influencial_CG.gz
	# printf "plotting CG methylation profile for ${sample} influencial TEs\n"
	# plotProfile -m TEs/matrix_${sample}_influencial_CG.gz -out TEs/profile_${sample}_influencial_CG.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --yAxisLabel "mCG (%)" --numPlotsPerRow 4 --perGroup --yMin 0
		
	# printf "computing CHG methylation matrix for ${sample} influencial TEs\n"
	# computeMatrix scale-regions -R TEs/${sample}_bound_influencial_TEs.bed TEs/${sample}_bound_uninfluencial_TEs.bed TEs/${sample}_unbound_influencial_TEs.bed TEs/${sample}_unbound_uninfluencial_TEs.bed TEs/${sample}_further1kb_TEs.bed -S methyl/CHG_WT_seedlings.bw methyl/CHG_MBD5_MBD6.bw methyl/CHG_mbd1_2_4.bw methyl/CHG_mbd1_2_5_6.bw methyl/CHG_mbd2_5_6.bw methyl/CHG_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${sample}_influencial_CHG.gz
	# printf "plotting CHG methylation profile for ${sample} influencial TEs\n"
	# plotProfile -m TEs/matrix_${sample}_influencial_CHG.gz -out TEs/profile_${sample}_influencial_CHG.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --yAxisLabel "mCHG (%)" --numPlotsPerRow 4 --perGroup --yMin 0

	# printf "computing CHH methylation matrix for ${sample} influencial TEs\n"
	# computeMatrix scale-regions -R TEs/${sample}_bound_influencial_TEs.bed TEs/${sample}_bound_uninfluencial_TEs.bed TEs/${sample}_unbound_influencial_TEs.bed TEs/${sample}_unbound_uninfluencial_TEs.bed TEs/${sample}_further1kb_TEs.bed -S methyl/CHH_WT_seedlings.bw methyl/CHH_MBD5_MBD6.bw methyl/CHH_mbd1_2_4.bw methyl/CHH_mbd1_2_5_6.bw methyl/CHH_mbd2_5_6.bw methyl/CHH_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${sample}_influencial_CHH.gz
	# printf "plotting CHH methylation profile for ${sample} influencial TEs\n"
	# plotProfile -m TEs/matrix_${sample}_influencial_CHH.gz -out TEs/profile_${sample}_influencial_CHH.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD5/6 MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --yAxisLabel "mCHH (%)" --numPlotsPerRow 4 --perGroup --yMin 0
		
	# printf "computing rna matrix for ${sample} influencial TEs\n"
	# computeMatrix scale-regions -R TEs/${sample}_bound_influencial_TEs.bed TEs/${sample}_bound_uninfluencial_TEs.bed TEs/${sample}_unbound_influencial_TEs.bed TEs/${sample}_unbound_uninfluencial_TEs.bed TEs/${sample}_further1kb_TEs.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o TEs/matrix_${sample}_influencial_rna.gz
	# printf "plotting rna profile for ${sample} influencial TEs\n"
	# plotProfile -m TEs/matrix_${sample}_influencial_rna.gz -out TEs/profile_${sample}_influencial_rna.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA" --numPlotsPerRow 4 --perGroup
	# plotProfile -m TEs/matrix_${sample}_influencial_rna.gz -out TEs/profile_${sample}_influencial_rna_med.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA" --numPlotsPerRow 4 --perGroup	
# done	
	
###############################################################################################################################################################################

#### For Genes analysis

#### Get methylation at all protein coding genes

# source /home/tstuart/working_data/virtualenvs/env/bin/activate
# python find_methylation_at_bedfiles.py
# deactivate

#### Split into pseudo genes (methylated in mCHG and mCHH (>2%), GbM genes (>5% mCG) and unmethylated genes

# sed 's/\r//' methyl/mC_protein_coding_gene_WT_seedlings_CG.tsv | sort -k1,1 > methyl/interCG_protein_coding_gene.tsv
# sed 's/\r//' methyl/mC_protein_coding_gene_WT_seedlings_CHG.tsv | sort -k1,1 > methyl/interCHG_protein_coding_gene.tsv
# sed 's/\r//' methyl/mC_protein_coding_gene_WT_seedlings_CHH.tsv | sort -k1,1 > methyl/interCHH_protein_coding_gene.tsv
# join methyl/interCG_protein_coding_gene.tsv methyl/interCHG_protein_coding_gene.tsv | join - methyl/interCHH_protein_coding_gene.tsv | awk -v OFS="\t" '{if ($5+$8>2) print $1 > "annotations/inter_mC_pseudogenes.txt"; else if ($2>5) print $1 > "annotations/inter_mC_GbM_genes.txt"; else print $1 > "annotations/inter_mC_unmethylated.txt"; }'

# wc -l annotations/all_protein_coding_gene.bed > annotations/stats_genes_methylation.txt
# sort -k4,4 annotations/all_protein_coding_gene.bed > annotations/inter_allgenes.bed

# for type in GbM_genes pseudogenes unmethylated
# do
	#### get number of genes in each category
	# join -1 1 -2 4 annotations/inter_mC_${type}.txt annotations/inter_allgenes.bed | awk -v OFS="\t" '{print $2,$3,$4,$1,$5,$6}' > annotations/mC_${type}.bed
	# wc -l annotations/mC_${type}.bed >> annotations/stats_genes_methylation.txt
	# cat annotations/stats_genes_methylation.txt
# done

############### plotting heatmaps

# for gene in GbM unmethylated
# do
	# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT
	# do
		# bedtools intersect -wa -a Genes/${gene}.bed -b manuscript/peaks_${sample}.bed > Genes/chip_${sample}_bound_${gene}.txt
	# done
	# cat Genes/chip_*_bound_${gene}.txt | sort -k1,1n -k2,2n | uniq > Genes/bound_${gene}.bed
	# nbound=$(wc -l Genes/bound_${gene}.bed | awk '{print $1}')
	# bedtools intersect -v -f 1 -r -wa -a Genes/${gene}.bed -b Genes/bound_${gene}.bed > Genes/unbound_${gene}.bed
	# nunbound=$(wc -l Genes/unbound_${gene}.bed | awk '{print $1}')
	# ntot=$(wc -l Genes/${gene}.bed | awk '{print $1}')
	# if [[ $((nbound + nunbound)) != $ntot ]]
	# then
		# printf "Problem! $nbound bound + $nunbound unbound is not equal to $ntot total\nMissing lines are:\n"
		# cat Genes/bound_${gene}.bed Genes/unbound_${gene}.bed | sort -k1,1n -k2,2n | comm - annotations/*${gene}*.bed | head
		# exit
	# fi
	# if [[ $gene == "GbM" ]]
	# then
		# label=$(grep $gene annotations/stats_genes_methylation.txt | awk -v g=$gene '{print g"_genes("$1")"}' )
		# region=$(printf "Genes/"$gene".bed")
	# else
		# label=$(printf "Bound(${nbound}) Unbound(${nunbound})")
		# region=$(printf "Genes/bound_"$gene".bed Genes/unbound_"$gene".bed")
	# fi
	###### reference point
	# printf "computing chip matrix for $gene genes in $region\n"
	# computeMatrix reference-point --referencePoint "TSS" -R $region -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 1000 -a 8000 -p 10 -o Genes/matrix_${gene}_chip.gz

	# printf "plotting chip heatmap for $gene genes\n"
	# plotHeatmap -m Genes/matrix_${gene}_chip.gz -out Genes/heatmap_${gene}_chip.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --refPointLabel "TSS" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --zMin -1.5 --zMax 1.5 --whatToShow "heatmap and colorbar"
	
	# for context in CG CHG CHH
	# do
		# ylab=$(printf "\"m"${context}" (%)\"")
		# printf "computing $context methylation matrix for $gene genes in $region\n"
		# computeMatrix reference-point --referencePoint "TSS" -R $region -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 100 -b 1000 -a 8000 -p 10 -o Genes/matrix_${gene}_${context}.gz
		
		# printf "plotting $context methylation heatmap for $gene genes\n"
		# plotHeatmap -m Genes/matrix_${gene}_${context}.gz -out Genes/heatmap_${gene}_${context}.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --refPointLabel "TSS" --xAxisLabel "Genes" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
		# printf "plotting $context methylation profiles for $gene genes\n"
		# plotProfile -m Genes/matrix_${gene}_${context}.gz -out Genes/profile_${gene}_${context}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --refPointLabel "TSS" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --numPlotsPerRow 4 --perGroup --yMin 0
		# plotProfile -m Genes/matrix_${gene}_${context}.gz -out Genes/profile_${gene}_${context}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --refPointLabel "TSS" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --numPlotsPerRow 4 --perGroup --yMin 0
	# done
	
	# printf "computing rna matrix for $gene genes in $region\n"
	# computeMatrix reference-point --referencePoint "TSS" -R $region -S rna/mbd1_2_4_log2.bw rna/mbd1_2_5_6_log2.bw rna/mbd2_5_6_log2.bw rna/suvh1_3_log2.bw -bs 50 -b 1000 -a 8000 -p 10 -o Genes/matrix_${gene}_rna.gz

	# printf "plotting rna heatmap for $gene genes\n"
	# plotHeatmap -m Genes/matrix_${gene}_rna.gz -out Genes/heatmap_${gene}_rna.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --refPointLabel "TSS" --xAxisLabel "Genes" --samplesLabel MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting rna profiles for $gene genes\n"
	# plotProfile -m Genes/matrix_${gene}_rna.gz -out Genes/profile_${gene}_rna_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --refPointLabel "TSS" --samplesLabel MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
	# plotProfile -m Genes/matrix_${gene}_rna.gz -out Genes/profile_${gene}_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --refPointLabel "TSS" --samplesLabel MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
	
	# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
	# do
		# printf "computing $mark matrix for $gene genes in $region\n"
		# computeMatrix reference-point --referencePoint "TSS" -R $region -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 50 -b 1000 -a 8000 -p 10 -o Genes/matrix_${gene}_${mark}.gz

		# printf "plotting $mark heatmap for $gene genes\n"
		# plotHeatmap -m Genes/matrix_${gene}_${mark}.gz -out Genes/heatmap_${gene}_${mark}.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --refPointLabel "TSS" --xAxisLabel "Genes" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
		# printf "plotting $mark profiles for $gene genes\n"
		# plotProfile -m Genes/matrix_${gene}_${mark}.gz -out Genes/profile_${gene}_${mark}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --refPointLabel "TSS" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
		# plotProfile -m Genes/matrix_${gene}_${mark}.gz -out Genes/profile_${gene}_${mark}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --refPointLabel "TSS" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
		
	##### scale regions
	# printf "computing chip matrix for $gene genes in $region\n"
	# computeMatrix scale-regions -R $region -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_${gene}_chip_reg.gz

	# printf "plotting chip heatmap for $gene genes\n"
	# plotHeatmap -m Genes/matrix_${gene}_chip_reg.gz -out Genes/heatmap_${gene}_reg_chip.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --zMin -1.5 --zMax 1.5 --whatToShow "heatmap and colorbar"
	
	# for context in CG CHG CHH
	# do
		# ylab=$(printf "\"m${context}(%%)\"")
		# printf "computing $context methylation matrix for $gene genes in $region\n"
		# computeMatrix scale-regions -R $region -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 100 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_${gene}_${context}_reg.gz
		
		# printf "plotting $context methylation heatmap for $gene genes\n"
		# plotHeatmap -m Genes/matrix_${gene}_${context}_reg.gz -out Genes/heatmap_${gene}_reg_${context}.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
		# printf "plotting $context methylation profiles for $gene genes\n"
		# plotProfile -m Genes/matrix_${gene}_${context}_reg.gz -out Genes/profile_${gene}_reg_${context}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --numPlotsPerRow 4 --perGroup --yMin 0
		# plotProfile -m Genes/matrix_${gene}_${context}_reg.gz -out Genes/profile_${gene}_reg_${context}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --numPlotsPerRow 4 --perGroup --yMin 0
	# done
	
	# printf "computing rna matrix for $gene genes in $region\n"
	# computeMatrix scale-regions -R $region -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_${gene}_rna_reg.gz

	# printf "plotting rna heatmap for $gene genes\n"
	# plotHeatmap -m Genes/matrix_${gene}_rna_reg.gz -out Genes/heatmap_${gene}_reg_rna.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting rna profiles for $gene genes\n"
	# plotProfile -m Genes/matrix_${gene}_rna_reg.gz -out Genes/profile_${gene}_reg_rna_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates" --numPlotsPerRow 4 --perGroup
	# plotProfile -m Genes/matrix_${gene}_rna_reg.gz -out Genes/profile_${gene}_reg_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates" --numPlotsPerRow 4 --perGroup
	
	# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
	# do
		# printf "computing $mark matrix for $gene genes in $region\n"
		# computeMatrix scale-regions -R $region -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_${gene}_${mark}_reg.gz

		# printf "plotting $mark heatmap for $gene genes\n"
		# plotHeatmap -m Genes/matrix_${gene}_${mark}_reg.gz -out Genes/heatmap_${gene}_reg_${mark}.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel $label --whatToShow "heatmap and colorbar"
		# printf "plotting $mark profiles for $gene genes\n"
		# plotProfile -m Genes/matrix_${gene}_${mark}_reg.gz -out Genes/profile_${gene}_reg_${mark}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
		# plotProfile -m Genes/matrix_${gene}_${mark}_reg.gz -out Genes/profile_${gene}_reg_${mark}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
	# done
	
	# printf "computing CG density matrix for $gene genes in $region\n" 
	# computeMatrix scale-regions -R $region -S annotations/CG_density.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_${gene}_density_reg.gz --missingDataAsZero	
	# printf "plotting CG density heatmap for $gene genes\n"
	# plotHeatmap -m Genes/matrix_${gene}_density_reg.gz -out Genes/heatmap_${gene}_reg_density.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod nearest --startLabel "TSS" --endLabel "TES" --xAxisLabel "Genes" --samplesLabel CG_density --regionsLabel $label --whatToShow "heatmap and colorbar"
	# printf "plotting CG density profiles for $gene genes\n"
	# plotProfile -m Genes/matrix_${gene}_density_reg.gz -out Genes/profile_${gene}_reg_density_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "CG density" --numPlotsPerRow 4 --perGroup
	# plotProfile -m Genes/matrix_${gene}_density_reg.gz -out Genes/profile_${gene}_reg_density_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "CG density" --numPlotsPerRow 4 --perGroup
# done	

############### plotting heatmaps for MBD1 bound genes

# bedtools intersect -wa -a Genes/unmethylated.bed -b manuscript/peaks_MBD1.bed | uniq > Genes/MBD1_bound_unmethylated.bed
# nbound=$(wc -l Genes/MBD1_bound_unmethylated.txt | awk '{print $1}')
# bedtools intersect -v -f 1 -r -wa -a Genes/unmethylated.bed -b Genes/MBD1_bound_unmethylated.bed > Genes/MBD1_unbound_unmethylated.bed
# nunbound=$(wc -l Genes/MBD1_unbound_unmethylated.txt | awk '{print $1}')
# ntot=$(wc -l Genes/unmethylated.bed | awk '{print $1}')
# if [[ $((nbound + nunbound)) != $ntot ]]
# then
	# printf "Problem! $nbound bound + $nunbound unbound is not equal to $ntot total\n"
	# exit
# else
	# printf "No problem, $nbound bound + $nunbound unbound is equal to $ntot total\n"
# fi
# label=$(printf "Bound(${nbound}) Unbound(${nunbound})")
# region=$(printf "Genes/MBD1_bound_unmethylated.bed Genes/MBD1_unbound_unmethylated.bed")

# printf "computing chip matrix for MBD1_gene in $region\n"
# computeMatrix reference-point --referencePoint "TSS" -R $region -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 50 -b 1000 -a 8000 -p 10 -o Genes/matrix_MBD1_gene_chip.gz

# printf "plotting chip heatmap for MBD1_gene\n"
# plotHeatmap -m Genes/matrix_MBD1_gene_chip.gz -out Genes/heatmap_MBD1_gene_chip.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --refPointLabel "TSS" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --zMin -1.5 --zMax 1.5 --whatToShow "heatmap and colorbar"
	
# for context in CG CHG CHH
# do
	# ylab=$(printf "\"m${context}(%%)\"")
	# printf "computing $context methylation matrix for MBD1_gene in $region\n"
	# computeMatrix scale-regions -R $region -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 100 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_MBD1_gene_${context}.gz
	# printf "plotting $context methylation profiles for MBD1_gene\n"
	# plotProfile -m Genes/matrix_MBD1_gene_${context}.gz -out Genes/profile_MBD1_gene_${context}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --numPlotsPerRow 4 --perGroup --yMin 0
# done
	
# printf "computing rna matrix for MBD1_gene in $region\n"
# computeMatrix scale-regions -R $region -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_MBD1_gene_rna.gz
# printf "plotting rna profiles for MBD1_gene\n"
# plotProfile -m Genes/matrix_MBD1_gene_rna.gz -out Genes/profile_MBD1_gene_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates" --numPlotsPerRow 4 --perGroup
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# printf "computing $mark matrix for MBD1_gene in $region\n"
	# computeMatrix scale-regions -R $region -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 50 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_MBD1_gene_${mark}.gz
	# printf "plotting $mark profiles for MBD1_gene\n"
	# plotProfile -m Genes/matrix_MBD1_gene_${mark}.gz -out Genes/profile_MBD1_gene_${mark}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "log2FC" --numPlotsPerRow 4 --perGroup
# done
	
# printf "computing CG density matrix for MBD1_gene in $region\n" 
# computeMatrix scale-regions -R $region -S annotations/CG_density.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/matrix_MBD1_gene_density.gz --missingDataAsZero	
# printf "plotting CG density profiles for MBD1_gene\n"
# plotProfile -m Genes/matrix_MBD1_gene_density.gz -out Genes/profile_MBD1_gene_density_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "CG density" --numPlotsPerRow 4 --perGroup

############### plotting heatmaps for GbM + MBD1_bound genes

# bedtools intersect -wa -a Genes/unmethylated.bed -b manuscript/peaks_MBD1.bed | sort -k1,1n -k2,2n | uniq > Genes/MBD1_bound_unmethylated.bed
# bedtools intersect -v -f 1 -r -wa -a Genes/unmethylated.bed -b Genes/MBD1_bound_unmethylated.bed > Genes/MBD1_unbound_unmethylated.bed

# nbound=$(wc -l Genes/MBD1_bound_unmethylated.bed | awk '{print $1}')
# nunbound=$(wc -l Genes/MBD1_unbound_unmethylated.bed | awk '{print $1}')
# nboundGbM=$(wc -l Genes/bound_GbM.bed | awk '{print $1}')
# nunboundGbM=$(wc -l Genes/unbound_GbM.bed | awk '{print $1}')

# label=$(awk -v a=$nboundGbM -v b=$nunboundGbM -v c=$nbound -v d=$nunbound 'BEGIN {print "Bound_GbM_genes("a") Unbound_GbM_genes("b") Bound_by_MBD1("c") Unbound("d")"}')
# region=$(printf "Genes/bound_GbM.bed Genes/unbound_GbM.bed Genes/MBD1_bound_unmethylated.bed Genes/MBD1_unbound_unmethylated.bed")
# colors=("#000000" "#70B6EC" "#9E61CB" "#4F4FCC" "#E8824E")

# # printf "Type\tBound\tAGI\n" > Genes/Clusters_AGI.txt
# # awk -v OFS="\t" '{print "GbM","Bound",$4}' Genes/bound_GbM.bed >> Genes/Clusters_AGI.txt
# # awk -v OFS="\t" '{print "GbM","Unbound",$4}' Genes/unbound_GbM.bed >> Genes/Clusters_AGI.txt
# # awk -v OFS="\t" '{print "Unmethylated","Bound",$4}' Genes/MBD1_bound_unmethylated.bed >> Genes/Clusters_AGI.txt
# # awk -v OFS="\t" '{print "Unmethylated","Unbound",$4}' Genes/MBD1_unbound_unmethylated.bed >> Genes/Clusters_AGI.txt


# printf "computing chip matrix for genes in $region\n"
# computeMatrix reference-point --referencePoint "TSS" -R $region -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 8000 -p 10 -o Genes/aaa_matrix_chip.gz
# printf "plotting chip heatmap for genes\n"
# plotHeatmap -m Genes/aaa_matrix_chip.gz -out Genes/aaa_heatmap_chip.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --refPointLabel "TSS" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel $label --zMin -1 --zMax 1 --whatToShow "heatmap and colorbar"

# printf "computing region chip matrix for genes in $region\n"
# computeMatrix scale-regions -R $region -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/aaa_matrix_chip_reg.gz
# printf "plotting chip profile for genes\n"
# plotProfile -m Genes/aaa_matrix_chip_reg.gz -out Genes/aaa_profile_chip.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --yAxisLabel "ChIP (Log2FCvsInput)" --numPlotsPerRow 4 --perGroup
# plotProfile -m Genes/aaa_matrix_chip_reg.gz -out Genes/aaa_profile_chip2.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --yAxisLabel "ChIP (Log2FCvsInput)" --numPlotsPerRow 4

# for context in CG CHG CHH
# do
	# case "$context" in
		# CG) 	ylab="mCG"
				# max=100;;
		# CHG)	ylab="mCHG"
				# max=50;;
		# CHH)	ylab="mCHH"
				# max=20;;
	# esac
	# printf "computing $context methylation matrix for genes in $region\n"
	# computeMatrix scale-regions -R $region -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/aaa_matrix_${context}.gz
	# printf "plotting $context methylation profiles for genes\n"
	# plotProfile -m Genes/aaa_matrix_${context}.gz -out Genes/aaa_profile_${context}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD2/5/6 MBD1/2/5/6 SUVH1/3 --yAxisLabel $ylab --numPlotsPerRow 4 --perGroup --yMin 0 --yMax $max --colors ${colors[@]}
# done
	
# printf "computing rna matrix for genes in $region\n"
# computeMatrix scale-regions -R $region -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/aaa_matrix_rna.gz
# printf "plotting rna profiles for genes\n"
# plotProfile -m Genes/aaa_matrix_rna.gz -out Genes/aaa_profile_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates (median)" --numPlotsPerRow 4 --perGroup
# printf "plotting rna profiles for genes\n"
# plotProfile -m Genes/aaa_matrix_rna.gz -out Genes/aaa_profile_rna_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates (mean)" --numPlotsPerRow 4 --perGroup
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# printf "computing $mark matrix for genes in $region\n"
	# computeMatrix scale-regions -R $region -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/aaa_matrix_${mark}.gz
	# printf "plotting $mark profiles for genes\n"
	# plotProfile -m Genes/aaa_matrix_${mark}.gz -out Genes/aaa_profile_${mark}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark (log2FCvsInput)" --numPlotsPerRow 4 --perGroup
# done
	
# printf "computing CG density matrix for genes in $region\n" 
# computeMatrix scale-regions -R $region -S annotations/CG_density.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/aaa_matrix_density.gz --missingDataAsZero	
# printf "plotting CG density profiles for genes\n"
# plotProfile -m Genes/aaa_matrix_density.gz -out Genes/aaa_profile_density_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel CG_density --yAxisLabel "CG density" --numPlotsPerRow 4 --perGroup

### For H2AKub mark

# printf "computing $mark matrix for genes in $region\n"
# samplelabel=$(ls -f histones/*H2AKub.bw | sed 's;histones/;;' | sed "s;_H2AKub.bw;;" | awk -v ORS=" " '{print $0}' )
# printf "$samplelabel\n"
# computeMatrix scale-regions -R $region -S histones/*H2AKub.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/H2AKub_matrix.gz
# printf "plotting $mark profiles for genes\n"
# plotProfile -m Genes/H2AKub_matrix.gz -out Genes/H2AKub_profile_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel $samplelabel --yAxisLabel "$mark (log2FCvsH3)" --numPlotsPerRow 4 --perGroup
# printf "plotting $mark profiles for genes\n"
# plotProfile -m Genes/H2AKub_matrix.gz -out Genes/H2AKub_profile_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel $samplelabel --yAxisLabel "$mark (log2FCvsH3)" --numPlotsPerRow 4 --perGroup

### For histone variants from open data

# printf "computing histones matrix for genes in $region\n"
# samplelabel=$(ls -f histones/Open_data/deeptools/H*.bw | sed "s;histones/Open_data/deeptools/;;" | sed "s;.bw;;" | awk -v ORS=" " '{print $0}' )
# # printf "$samplelabel\n"
# # computeMatrix scale-regions -R $region -S histones/Open_data/deeptools/H*.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/Histone_matrix.gz
# printf "plotting histones profiles for genes\n"
# plotProfile -m Genes/Histone_matrix.gz -out Genes/Histone_profile_group.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel $samplelabel --yAxisLabel "log2FC vs Input" --numPlotsPerRow 4 --perGroup
# printf "plotting $mark profiles for genes\n"
# plotProfile -m Genes/Histone_matrix.gz -out Genes/Histone_profile_mean.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel $samplelabel --yAxisLabel "log2FC vs Input" --numPlotsPerRow 4

#### To check histone differences at DEGs

# mutants=(WT mbd1_2_4 mbd2_5_6 mbd1_2_5_6 suvh1_3)
# mutlabels=(WT WT WT mbd1/2/4 mbd1/2/4 mbd1/2/4 mbd2/5/6 mbd2/5/6 mbd2/5/6 mbd1/2/5/6 mbd1/2/5/6 mbd1/2/5/6 SUVH1/3 SUVH1/3 SUVH1/3)
# colors=("#000000" "#000000" "#000000" "#70B6EC" "#70B6EC" "#70B6EC" "#9E61CB" "#9E61CB" "#9E61CB" "#4F4FCC" "#4F4FCC" "#4F4FCC" "#E8824E" "#E8824E" "#E8824E")
# mutantschip=(WT MBD1_2_4 MBD2_5_6 MBD1_2_5_6 SUVH1_3)
# mutlabelschip=(WT mbd1/2/4 mbd2/5/6 mbd1/2/5/6 suvh1/3)
# colorschip=("#000000" "#70B6EC" "#9E61CB" "#4F4FCC" "#E8824E")

# len=${#mutants[@]}
# rm manuscript/deeptools/*list*txt
# for (( i=0; i<$len; i++ )) 
# do 
	# printf "rna/${mutants[$i]}_a.bw rna/${mutants[$i]}_b.bw rna/${mutants[$i]}_c.bw " >> manuscript/deeptools/DEG_mutantlist.txt
	# if [[ ${mutants[$i]} != "WT" ]]
	# then
		# printf "rna/${mutants[$i]}_up.bed rna/${mutants[$i]}_down.bed rna/${mutants[$i]}_others.bed " >> manuscript/deeptools/DEG_regionlist.txt
		# na=$(wc -l rna/${mutants[$i]}_up.bed | awk '{print $1}')
		# nb=$(wc -l rna/${mutants[$i]}_down.bed | awk '{print $1}')
		# nc=$(wc -l rna/${mutants[$i]}_others.bed | awk '{print $1}')
		# printf "${mutants[$i]}_up(${na}) ${mutants[$i]}_down(${nb}) ${mutants[$i]}_others(${nc}) " >> manuscript/deeptools/DEG_regionlabellist.txt
	# fi
# done
# mutantlist=$(cat manuscript/deeptools/DEG_mutantlist.txt)
# regionlist=$(cat manuscript/deeptools/DEG_regionlist.txt)
# regionlabel=$(cat manuscript/deeptools/DEG_regionlabellist.txt)

# printf "computing rna matrix for DEGs\nmutantlabels: ${mutlabels[@]}\nregionlist: ${regionlabel}\n"
# computeMatrix scale-regions -R ${regionlist} -S ${mutantlist} -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/DEG_rna.gz
# printf "plotting rna profile for DEGs\n"
# plotProfile -m manuscript/deeptools/DEG_rna.gz -out manuscript/plots/DEG_profile_rna_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --colors ${colors[@]} --samplesLabel ${mutlabels[@]} --regionsLabel ${regionlabel} --perGroup --numPlotsPerRow 3
# plotProfile -m manuscript/deeptools/DEG_rna.gz -out manuscript/plots/DEG_profile_rna_median.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES" --colors ${colors[@]} --samplesLabel ${mutlabels[@]} --regionsLabel ${regionlabel} --perGroup --numPlotsPerRow 3
	
# printf "computing chip matrix for DEGs\nmutantlabels: MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT\nregionlist: ${regionlabel}\n"
# computeMatrix scale-regions -R ${regionlist} -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/DEG_chip.gz
# printf "plotting chip profile for DEGs\n"
# plotProfile -m manuscript/deeptools/DEG_chip.gz -out manuscript/plots/DEG_profile_chip_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel ${regionlabel} --colors ${colors[@]} --numPlotsPerRow 3
# plotProfile -m manuscript/deeptools/DEG_chip.gz -out manuscript/plots/DEG_profile_chip_mean2.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --regionsLabel ${regionlabel} --perGroup --numPlotsPerRow 3
# rm manuscript/deeptools/*list*txt
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# for (( i=0; i<$len; i++ )) 
	# do 
		# printf "histones/${mutantschip[$i]}_${mark}.bw " >> manuscript/deeptools/DEG_mutantlist_${mark}.txt
	# done
	# mutantlist=$(cat manuscript/deeptools/DEG_mutantlist_${mark}.txt)
	# # printf "computing $mark matrix for DEGs\nmutantlist: ${mutlabels[@]}\nregionlist: ${regionlabel}\n"
	# # computeMatrix scale-regions -R ${regionlist} -S ${mutantlist} -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/DEG_${mark}.gz
	# printf "plotting $mark profile for DEGs\n"
	# plotProfile -m manuscript/deeptools/DEG_${mark}.gz -out manuscript/plots/DEG_profile_${mark}_median.pdf --plotType 'lines' --averageType 'median' --startLabel "TSS" --endLabel "TES"  --colors ${colorschip[@]} --samplesLabel ${mutlabelschip[@]} --regionsLabel ${regionlabel} --perGroup --numPlotsPerRow 3
	# plotProfile -m manuscript/deeptools/DEG_${mark}.gz -out manuscript/plots/DEG_profile_${mark}_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES"  --colors ${colorschip[@]} --samplesLabel ${mutlabelschip[@]} --regionsLabel ${regionlabel} --perGroup --numPlotsPerRow 3
# done

# mutantmc=(WT_seedlings mbd1_2_4 mbd2_5_6 mbd1_2_5_6 SUVH1_SUVH3)
# labelmc=(WT MBD1/2/4 MBD2/5/6 MBD1/2/5/6 SUVH1/3)
# colorsmc=("#000000" "#70B6EC" "#9E61CB" "#4F4FCC" "#E8824E")
# lenmc=${#mutantmc[@]}
# for context in CG CHG CHH
# do
	# for (( i=0; i<$lenmc; i++ )) 
	# do 
		# printf "methyl/${context}_${mutantmc[$i]}.bw " >> manuscript/deeptools/DEG_mutantlist_${context}.txt
	# done
	# mutantlist=$(cat manuscript/deeptools/DEG_mutantlist_${context}.txt)
	# printf "computing $context methylation matrix for DEGs\nmutantlabels: ${labelmc[@]}\nmutantlist: ${mutantlist}\nregionlist: ${regionlabel}\n"
	# computeMatrix scale-regions -R ${regionlist} -S ${mutantlist} -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/DEG_m${context}.gz
	# printf "plotting $context methylation profile for DEGs\n"
	# plotProfile -m manuscript/deeptools/DEG_m${context}.gz -out manuscript/plots/DEG_profile_${context}_mean.pdf --plotType 'lines' --averageType 'mean' --startLabel "TSS" --endLabel "TES"  --colors ${colorsmc[@]} --samplesLabel ${labelmc[@]} --regionsLabel ${regionlabel} --perGroup --numPlotsPerRow 3
# done

### create a table of DEGs
# printf "AGI\tSample\tDirection\tGroup\n" > manuscript/DEG_table_mutants.txt
# mutants=(mbd1_2_4 mbd2_5_6 mbd1_2_5_6 suvh1_3)
# len=${#mutants[@]}
# for (( i=0; i<$len; i++ )) 
# do 
	# printf "${mutants[$i]}\n"
	# for type in up down others
	# do
		# printf "${mutants[$i]} $type\n"
		# printf "${mutants[$i]}_${type}\n" > manuscript/col_${mutants[$i]}_${type}.txt
		# while read chr start end AGI dot strand
		# do
			# pres=$(grep $AGI rna/${mutants[$i]}_${type}.bed)
			# if [[ $pres == "" ]]
			# then 
				# printf "0\n" >> manuscript/col_${mutants[$i]}_${type}.txt
			# else 
				# printf "1\n" >> manuscript/col_${mutants[$i]}_${type}.txt
			# fi
		# done < annotations/all_protein_coding_gene.bed
		# head manuscript/col_${mutants[$i]}_${type}.txt
	# done
# done
# awk 'BEGIN {print "AGI"} {print $4}' annotations/all_protein_coding_gene.bed > manuscript/col_AGI.txt
# paste manuscript/col_*.txt > manuscript/DEG_table_mutants.txt

################ Order gene based on H2AKub and see ChIP signals

# printf "computing H2AKub ordered matrix\n"
# computeMatrix scale-regions -R annotations/all_protein_coding_gene.bed -S histones/WT_H2AKub.bw histones/MBD1_2_5_6_H2AKub.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o manuscript/deeptools/H2AKub_order.gz
# printf "plotting heatmap for H2AKub\n"
# plotHeatmap -m manuscript/deeptools/H2AKub_order.gz -out manuscript/plots/heatmap_H2AKub_ordered_k2.pdf --sortRegions descend --sortUsing mean --sortUsingSamples 1 --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel H2AKub_WT H2AKub_mbd1/2/5/6 MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 2 --outFileSortedRegions manuscript/deeptools/regions_H2AKub_k2.bed

# printf "computing pass2 H2AKub ordered matrix\n"
# computeMatrix reference-point --referencePoint "TSS" -R manuscript/deeptools/regions_H2AKub_k2.bed -S histones/WT_H2AKub.bw histones/MBD1_2_4_H2AKub.bw histones/MBD1_2_5_6_H2AKub.bw histones/MBD2_5_6_H2AKub.bw histones/SUVH1_3_H2AKub.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/H2AKub_order_pass2_all.gz
# printf "plotting pass2 heatmap for H2AKub\n"
# plotHeatmap -m manuscript/deeptools/H2AKub_order_pass2_all.gz -out manuscript/plots/heatmap_H2AKub_ordered_pass2_all.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel "H2AKub_WT" "H2AKub_mbd1/2/4" "H2AKub_mbd1/2/5/6" "H2AKub_mbd2/5/6" "H2AKub_suvh1/3" "MBD1" "MBD2" "MBD4" "MBD5" "MBD6" "SUVH1" "SUVH3" "WT"

# printf "computing H2AKub ordered matrix\n"
# computeMatrix reference-point --referencePoint "TSS" -R Genes/bound_GbM.bed Genes/unbound_GbM.bed Genes/bound_unmethylated.bed Genes/unbound_unmethylated.bed -S histones/WT_H2AKub.bw histones/MBD1_2_5_6_H2AKub.bw manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/H2AKub_order_grouped.gz
# printf "plotting heatmap for H2AKub\n"
# plotHeatmap -m manuscript/deeptools/H2AKub_order_grouped.gz -out manuscript/plots/heatmap_H2AKub_ordered_grouped.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel H2AKub_WT H2AKub_mbd1/2/5/6 MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT

### To split all the annotations into clusters

# printf "computing matrix\n"
# computeMatrix reference-point --referencePoint "TSS" -R annotations/all_annotations.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 6000 -p 10 -o manuscript/deeptools/matrix_all_annotations.gz
# printf "plotting k2 heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_all_annotations.gz -out manuscript/plots/heatmap_all_annotations_k2.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 2 --outFileSortedRegions manuscript/deeptools/regions_all_annotations_k2.bed
# printf "plotting k3 heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_all_annotations.gz -out manuscript/plots/heatmap_all_annotations_k3.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 3 --outFileSortedRegions manuscript/deeptools/regions_all_annotations_k3.bed
# printf "plotting k4 heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_all_annotations.gz -out manuscript/plots/heatmap_all_annotations_k4.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 4 --outFileSortedRegions manuscript/deeptools/regions_all_annotations_k4.bed
# printf "plotting k5 heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_all_annotations.gz -out manuscript/plots/heatmap_all_annotations_k5.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 5 --outFileSortedRegions manuscript/deeptools/regions_all_annotations_k5.bed
# printf "plotting k8 heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_all_annotations.gz -out manuscript/plots/heatmap_all_annotations_k8.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 8 --outFileSortedRegions manuscript/deeptools/regions_all_annotations_k8.bed
# printf "plotting k10 heatmap\n"
# plotHeatmap -m manuscript/deeptools/matrix_all_annotations.gz -out manuscript/plots/heatmap_all_annotations_k10.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --kmeans 10 --outFileSortedRegions manuscript/deeptools/regions_all_annotations_k10.bed

################################################################################################################################################################################
# ###### Find motifs with Homer

### each candidate's peaks

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# findMotifsGenome.pl manuscript/peaks_${sample}.bed tair10 homer/${sample}/ -size 200 -len 5,6,8,10 -p 10 -S 10
# done

### specific regions

# findMotifsGenome.pl manuscript/deeptools/grouped_v2_common.bed tair10 homer/common_peaks/ -size 200 -len 5,6,8,10 -p 10 -S 10

# for cluster in 1 2 3 4 5
# do
	# awk -v OFS="\t" '{}' manuscript/deeptools/regions_reorganized.txt > manuscript/deeptools/Cluster_${clsuter}.bed
	# findMotifsGenome.pl manuscript/deeptools/Cluster_${clsuter}.bed tair10 homer/cluster_${cluster}/ -size 200 -len 5,6,8,10 -p 10 -S 10
# done

### Unmethylated gene bound by MBD1

# findMotifsGenome.pl Genes/MBD1_bound_unmethylated.bed tair10 homer/MBD1_bound/ -size 200 -len 5,6,8,10 -p 10 -S 10
# findMotifsGenome.pl Genes/MBD1_bound_unmethylated.bed tair10 homer/MBD1_boundvsunbound/ -bg Genes/MBD1_unbound_unmethylated.bed -size 200 -len 5,6,8,10 -p 10 -S 10

################################################################################################################################################################################
################# V2
################################################################################################################################################################################

### To order regions based on difference between mbd2/5/6 and WT first, then plot heatmaps of other marks

# nbound=$(wc -l Genes/MBD1_bound_unmethylated.bed | awk '{print $1}')
# nunbound=$(wc -l Genes/MBD1_unbound_unmethylated.bed | awk '{print $1}')
# nboundGbM=$(wc -l Genes/bound_GbM.bed | awk '{print $1}')
# nunboundGbM=$(wc -l Genes/unbound_GbM.bed | awk '{print $1}')

# label=$(awk -v a=$nboundGbM -v b=$nunboundGbM -v c=$nbound -v d=$nunbound 'BEGIN {print "Bound_GbM_genes("a") Unbound_GbM_genes("b") Bound_by_MBD1("c") Unbound("d")"}')
# region=$(printf "Genes/bound_GbM.bed Genes/unbound_GbM.bed Genes/MBD1_bound_unmethylated.bed Genes/MBD1_unbound_unmethylated.bed")

# ## pass 1
# printf "computing H2AKub ordered matrix\n"
# computeMatrix scale-regions -R $region -S histones/MBD1_2_5_6_H2AKub_minusWT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_H2AKubdiff.gz
# printf "ordering regions\n"
# plotHeatmap -m Genes/2a_matrix_H2AKubdiff.gz -out Genes/2a_heatmap_H2AKubdiff_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --xAxisLabel "Genes" --outFileSortedRegions Genes/2a_regions_H2AKubdiff_k2.bed --kmeans 2

## pass 2 

# for reg in bound_GbM unbound_GbM MBD1_bound_unmethylated MBD1_unbound_unmethylated
# do
	# rm Genes/${reg}_H2AKub_*.bed
	# bedtools intersect -a Genes/2a_regions_H2AKubdiff_k2.bed -b Genes/${reg}.bed -wa -f 1 -r | awk -v OFS="\t" -v r=$reg '{if ($13 == "cluster_1") printf $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\n" >> "Genes/"r"_H2AKub_up.bed"; else printf $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\n" >> "Genes/"r"_H2AKub_down.bed"}'
	
	# nup=$(wc -l Genes/${reg}_H2AKub_up.bed | awk '{print $1}')
	# ndown=$(wc -l Genes/${reg}_H2AKub_down.bed | awk '{print $1}')
	# label=$(awk -v a=$nup -v b=$ndown -v r=$reg 'BEGIN {print r"_up("a") "r"_down("b")"}')
	# region=$(printf "Genes/${reg}_H2AKub_up.bed Genes/${reg}_H2AKub_down.bed")
	# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
	# do
		# printf "computing matrix for $mark in $reg\n"
		# computeMatrix scale-regions -R $region -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_${mark}_${reg}_diffH2AKub.gz
		# printf "plotting heatmap for $mark in $reg\n"
		# plotHeatmap -m Genes/2a_matrix_${mark}_${reg}_diffH2AKub.gz -out Genes/2a_heatmap_${mark}_${reg}_diffH2AKubsorted.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --xAxisLabel "Genes" --regionsLabel $label --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3
	# done
# done

###### Only looking at MBD1 bound genes that lose H2AKub in mutant

# # pass 1
# printf "computing H2AKub ordered matrix\n"
# computeMatrix scale-regions -R Genes/MBD1_bound_unmethylated.bed -S histones/MBD1_2_5_6_H2AKub_minusWT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_H2AKubdiff_MBD1.gz
# printf "ordering regions\n"
# plotHeatmap -m Genes/2a_matrix_H2AKubdiff_MBD1.gz -out Genes/2a_heatmap_H2AKubdiff_MBD1_k3.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --xAxisLabel "Genes" --outFileSortedRegions Genes/2a_regions_H2AKubdiff_MBD1_k3.bed --kmeans 3

# n1=$(awk '$13=="cluster_1"' Genes/2a_regions_H2AKubdiff_MBD1_k3.bed | wc -l | awk '{print $1}')
# n2=$(awk '$13=="cluster_2"' Genes/2a_regions_H2AKubdiff_MBD1_k3.bed | wc -l | awk '{print $1}')
# n3=$(awk '$13=="cluster_3"' Genes/2a_regions_H2AKubdiff_MBD1_k3.bed | wc -l | awk '{print $1}')
# label=$(awk -v a=$n1 -v b=$n2 -v c=$n3 'BEGIN {print "MBD1_bound_no_change("a") MBD1_bound_mid("b") MBD1_bound_down("c")"}')
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# printf "computing matrix for $mark\n"
	# computeMatrix scale-regions -R Genes/2a_regions_H2AKubdiff_MBD1_k3.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_${mark}_diffH2AKubMBD1.gz
	# printf "plotting heatmap for $mark\n"
	# plotHeatmap -m Genes/2a_matrix_${mark}_diffH2AKubMBD1.gz -out Genes/2a_heatmap_${mark}_diffH2AKubMBD1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --xAxisLabel "Genes" --regionsLabel $label --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3
	# plotHeatmap -m Genes/2a_matrix_${mark}_diffH2AKubMBD1.gz -out Genes/2a_heatmap_${mark}_diffH2AKubMBD1group.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --xAxisLabel "Genes" --regionsLabel $label --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --perGroup
# done

# printf "computing matrix for ChIP\n"
# computeMatrix scale-regions -R Genes/2a_regions_H2AKubdiff_MBD1_k3.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_ChIP_diffH2AKubMBD1.gz
# printf "plotting heatmap for ChIP\n"
# plotHeatmap -m Genes/2a_matrix_ChIP_diffH2AKubMBD1.gz -out Genes/2a_heatmap_ChIP_diffH2AKubMBD1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --xAxisLabel "Genes" --regionsLabel $label --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT

# for context in CG CHG CHH
# do
	# case "$context" in
		# CG) 	ylab="mCG";;
		# CHG)	ylab="mCHG";;
		# CHH)	ylab="mCHH";;
	# esac
	# printf "computing matrix for ${ylab}\n"
	# computeMatrix scale-regions -R Genes/2a_regions_H2AKubdiff_MBD1_k3.bed -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 200 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_${ylab}_diffH2AKubMBD1.gz
	# printf "plotting heatmap for ${ylab}\n"
	# plotHeatmap -m Genes/2a_matrix_${ylab}_diffH2AKubMBD1.gz -out Genes/2a_heatmap_${ylab}_diffH2AKubMBD1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod bilinear --xAxisLabel "Genes" --regionsLabel $label --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3
# done
	
# printf "computing matrix for RNA\n"
# computeMatrix scale-regions -R Genes/2a_regions_H2AKubdiff_MBD1_k3.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/2a_matrix_RNA_diffH2AKubMBD1.gz
# printf "plotting heatmap for RNA\n"
# plotHeatmap -m Genes/2a_matrix_RNA_diffH2AKubMBD1.gz -out Genes/2a_heatmap_RNA_diffH2AKubMBD1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --xAxisLabel "Genes" --regionsLabel $label --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3
# printf "plotting profile for RNA\n"
# plotProfile -m Genes/2a_matrix_RNA_diffH2AKubMBD1.gz -out Genes/2a_profile_RNA_diffH2AKubMBD1group.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates (mean)" --perGroup
# plotProfile -m Genes/2a_matrix_RNA_diffH2AKubMBD1.gz -out Genes/2a_profile_RNA_diffH2AKubMBD1.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates (mean)"

# rm Genes/MBD1_downH2AKub_DEGstats.txt
# for sample in mbd1_2_4 mbd1_2_5_6 mbd2_5_6 suvh1_3
# do
	# printf "$sample\n"
	# rm Genes/MBD1_downH2AKub_${sample}.bed
	
	# sed 's/"//g' rna/DEG_RS5_x2.txt | grep $sample | awk -v OFS="\t" '{print $5,$6}' > rna/DEG_${sample}.txt
	
	# while read AGI dir
	# do
		# awk -v OFS="\t" -v g=$AGI -v d=$dir -v s=$sample '$4==g {if ($13=="cluster_1") c="No_change"; else if ($13=="cluster_2") c="Mid"; else if ($13=="cluster_3") c="Low"; print c,$4,d}' Genes/2a_regions_H2AKubdiff_MBD1_k3.bed >> Genes/MBD1_downH2AKub_${sample}.bed
	# done < rna/DEG_${sample}.txt

	# awk -v OFS="\t" '{print $1,$3}' Genes/MBD1_downH2AKub_${sample}.bed | sort | uniq -c | awk -v OFS="\t" -v s=$sample '{print s,$2,$3,$1}' >> Genes/MBD1_downH2AKub_DEGstats.txt
# done

# awk -v OFS="\t" 'BEGIN {print "Cluster","AGI"} NR>1 {if ($13=="cluster_1") c="No_change"; else if ($13=="cluster_2") c="Mid"; else if ($13=="cluster_3") c="Low"; print c,$4}' Genes/2a_regions_H2AKubdiff_MBD1_k3.bed > Genes/H2AKubdiff_clusters_AGI.txt


##############################################################

### For histone variants from open data

# nbound=$(wc -l Genes/MBD1_bound_unmethylated.bed | awk '{print $1}')
# nunbound=$(wc -l Genes/MBD1_unbound_unmethylated.bed | awk '{print $1}')
# nboundGbM=$(wc -l Genes/bound_GbM.bed | awk '{print $1}')
# nunboundGbM=$(wc -l Genes/unbound_GbM.bed | awk '{print $1}')

# label=$(awk -v a=$nboundGbM -v b=$nunboundGbM -v c=$nbound -v d=$nunbound 'BEGIN {print "Bound_GbM_genes("a") Unbound_GbM_genes("b") Bound_by_MBD1("c") Unbound("d")"}')
# region=$(printf "Genes/bound_GbM.bed Genes/unbound_GbM.bed Genes/MBD1_bound_unmethylated.bed Genes/MBD1_unbound_unmethylated.bed")

# samplelabel=$(ls -f histones/Open_data/deeptools/H*.bw | sed "s;histones/Open_data/deeptools/;;" | sed "s;.bw;;" | awk -v ORS=" " '{print $0}' )
# for mark in ${samplelabel[@]}
# do
	# printf "computing $mark matrix for genes in $region\n"
	# computeMatrix scale-regions -R $region -S histones/Open_data/deeptools/${mark}.bw -bs 20 -b 1000 -a 1000 -m 2000 -p 10 -o Genes/${mark}_matrix.gz
	# printf "plotting $mark profiles for genes in $region\n"
	# plotProfile -m Genes/${mark}_matrix.gz -out Genes/${mark}_profile_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $label --startLabel "TSS" --endLabel "TES" --samplesLabel $mark --yAxisLabel "$mark (log2FC vs Input)" --numPlotsPerRow 4 --perGroup
# done


#######################################################################################################################################################################
#### To rerun the peak clusters to avoid overlapping regions (going from larger number of candidates binding to least)

# rm manuscript/deeptools/temp*
# ### MBD1 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_k2.txt >> manuscript/deeptools/temp_common_peaks.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_cluster2.txt >> manuscript/deeptools/temp_MBD1_peaks.bed
# ### MBD2 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_common_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_4" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD2_k4.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# ### MBD4 peaks
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD4_k2.txt >> manuscript/deeptools/temp_common_peaks.bed
# ### MBD5 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD5_k3.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# ### MBD6 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_k3.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_cluster3.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD6_cluster3.txt >> manuscript/deeptools/temp_MBD5_6_peaks.bed
# ### SUVH1 peaks
# awk -v OFS="\t" '$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_SUVH1_k2.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# ### SUVH3 peaks
# awk -v OFS="\t" '$$0~/^[1-9]/ {print $1,$2,$3}' manuscript/deeptools/regions_peaks_SUVH3_k2.txt >> manuscript/deeptools/temp_MBD5_6_SUVH1_3_peaks.bed
# ### MBD1_2 peaks
# awk -v OFS="\t" '$13=="cluster_1" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD1_peaks.bed
# awk -v OFS="\t" '$13=="cluster_2" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD2_5_6_peaks.bed
# awk -v OFS="\t" '$13=="cluster_3" {print $1,$2,$3}' manuscript/deeptools/regions_peaks_MBD1_2_cluster.txt >> manuscript/deeptools/temp_MBD1_peaks.bed

# for file in manuscript/deeptools/temp_*.bed
# do
	# namext=${file##*/temp_}
	# name=${namext%_peaks.bed}
	# sort -k1,1n -k2,2n $file > manuscript/deeptools/temp2_${namext}
	# bedtools merge -d 100 -i manuscript/deeptools/temp2_${namext} > manuscript/deeptools/temp3_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp3_common_peaks.bed > manuscript/deeptools/final_common_peaks.bed
# rm manuscript/deeptools/temp3_common_peaks.bed

# for file in manuscript/deeptools/temp3_*.bed
# do
	# namext=${file##*/temp3_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_common_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp4_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp4_MBD5_6_SUVH1_3_peaks.bed > manuscript/deeptools/final_MBD5_6_SUVH1_3_peaks.bed
# rm manuscript/deeptools/temp4_MBD5_6_SUVH1_3_peaks.bed

# for file in manuscript/deeptools/temp4_*.bed
# do
	# namext=${file##*/temp4_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_MBD5_6_SUVH1_3_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp5_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp5_MBD2_5_6_peaks.bed > manuscript/deeptools/final_MBD2_5_6_peaks.bed
# rm manuscript/deeptools/temp5_MBD2_5_6_peaks.bed

# for file in manuscript/deeptools/temp5_*.bed
# do
	# namext=${file##*/temp5_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_MBD2_5_6_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp6_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp6_MBD5_6_peaks.bed > manuscript/deeptools/final_MBD5_6_peaks.bed
# rm manuscript/deeptools/temp6_MBD5_6_peaks.bed

# for file in manuscript/deeptools/temp6_*.bed
# do
	# namext=${file##*/temp6_}
	# name=${namext%_peaks.bed}
	# bedtools intersect -v -a $file -b manuscript/deeptools/final_MBD5_6_peaks.bed | awk -v OFS="\t" '$3-$2>100' > manuscript/deeptools/temp7_${namext}
# done
# awk -v OFS="\t" '$3-$2>100' manuscript/deeptools/temp7_MBD1_peaks.bed > manuscript/deeptools/final_MBD1_peaks.bed
# rm manuscript/deeptools/temp*

# #####################################################################################################################################################################################
# # # ### To check that final regions are not overlapping

# rm manuscript/overlapping_cluster_regions.txt
# for reg1 in common MBD1 MBD2_5_6 MBD5_6 MBD5_6_SUVH1_3
# do
	# for reg2 in common MBD1 MBD2_5_6 MBD5_6 MBD5_6_SUVH1_3
	# do
		# bedtools intersect -a manuscript/deeptools/final_${reg1}_peaks.bed -b manuscript/deeptools/final_${reg2}_peaks.bed | wc -l | awk -v OFS="\t" -v r1=$reg1 -v r2=$reg2 '{print r1,r2,$1}' >> manuscript/overlapping_cluster_regions.txt
	# done
# done
# cat manuscript/overlapping_cluster_regions.txt

# #####################################################################################################################################################################################

### To split common cluster based on MBD4

# ### pass1
# printf "computing matrix for common peaks pass1\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_common_peaks.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD5.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks.gz
# printf "plotting heatmap for common peaks pass1\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks.gz -out manuscript/plots/heatmap_common_pass1_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --zMin -1.5 --zMax 1.5 --samplesLabel MBD4 MBD1 MBD5 --outFileSortedRegions manuscript/deeptools/regions_common_pass1_k2.bed --kmeans 2

# ### pass2
# awk -v OFS="\t" '$13=="cluster_2"' manuscript/deeptools/regions_common_pass1_k2.bed > manuscript/deeptools/regions_common_pass2.bed
# printf "computing matrix for common peaks pass2\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_common_pass2.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD5.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks_pass2.gz
# printf "plotting heatmap for common peaks pass2\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks_pass2.gz -out manuscript/plots/heatmap_common_pass2_k4.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --zMin -1.5 --zMax 1.5 --samplesLabel MBD4 MBD1 MBD5 --outFileSortedRegions manuscript/deeptools/regions_common_pass2_k4.bed --kmeans 4

# ### pass3
# awk -v OFS="\t" '$13=="cluster_4"' manuscript/deeptools/regions_common_pass2_k4.bed > manuscript/deeptools/regions_common_pass3.bed
# printf "computing matrix for common peaks pass3\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_common_pass3.bed -S manuscript/MBD4.bw manuscript/MBD1.bw manuscript/MBD5.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks_pass3.gz
# printf "plotting heatmap for common peaks pass3\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks_pass3.gz -out manuscript/plots/heatmap_common_pass3_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --zMin -1.5 --zMax 1.5 --samplesLabel MBD4 MBD1 MBD5 --outFileSortedRegions manuscript/deeptools/regions_common_pass3_k2.bed --kmeans 2

# ### pass4
# awk -v OFS="\t" '$13=="cluster_2"' manuscript/deeptools/regions_common_pass3_k2.bed | sort -k1,1n -k2,2n > manuscript/deeptools/regions_common_pass4.bed
# printf "computing matrix for common peaks pass4\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_common_pass4.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_peaks_pass4.gz
# printf "plotting heatmap for common peaks pass4\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_peaks_pass4.gz -out manuscript/plots/heatmap_common_pass4_k2.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 --zMin -1.5 --zMax 1.5 --kmeans 2 --outFileSortedRegions manuscript/deeptools/regions_common_pass4_k2.bed

# ### final check
# awk -v OFS="\t" '$13=="cluster_2"' manuscript/deeptools/regions_common_pass4_k2.bed | sort -k1,1n -k2,2n > manuscript/deeptools/final2_MBD4_peaks.bed
# bedtools intersect -v -f 1 -r -a manuscript/deeptools/final_common_peaks.bed -b manuscript/deeptools/final2_MBD4_peaks.bed > manuscript/deeptools/final2_common_peaks.bed

# printf "computing matrix for peaks groups in common and MBD4\n"
# computeMatrix scale-regions -R manuscript/deeptools/final2_common_peaks.bed manuscript/deeptools/final2_MBD4_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_common_MBD4_peaks.gz
# printf "plotting heatmap for peaks common and MBD4\n"
# plotHeatmap -m manuscript/deeptools/grouped_common_MBD4_peaks.gz -out manuscript/plots/heatmap_common_MBD4.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 RNA Genes TEs --regionsLabel "Common" "MBD4" --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar"

# mv manuscript/deeptools/final2_common_peaks.bed manuscript/deeptools/final_common_peaks.bed
# mv manuscript/deeptools/final2_MBD4_peaks.bed manuscript/deeptools/final_MBD4_peaks.bed

# ########################## 
##### plotting final heatmap of ChIP+ over the 6 clusters

# rm manuscript/deeptools/regions_labels.txt
# for file in manuscript/deeptools/final_*.bed
# do
	# namext=${file##*/final_}
	# name=${namext%_peaks.bed}
	# wc -l $file | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/regions_labels.txt
# done
# clustername=$(cat manuscript/deeptools/regions_labels.txt)
	
# printf "computing matrix part1 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf1.gz
# printf "plotting heatmap part1 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf1.gz -out manuscript/plots/heatmap_grouped_vf1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar" --outFileSortedRegions manuscript/deeptools/regions_grouped_peaks.bed

##### plotting final heatmap of mC over the 6 clusters

# printf "computing matrix part2 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_grouped_peaks.bed -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 100 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf2.gz
# printf "plotting heatmap part2 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf2.gz -out manuscript/plots/heatmap_grouped_vf2.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'Oranges' 'Oranges' 'Oranges' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel mCG mCHG mCHH --regionsLabel ${clustername} --zMin 0 --zMax 100 80 30 --whatToShow "heatmap and colorbar"

# #### randomomize the common peak file to see if some annotations are overrepresented

# awk -v OFS="\t" '$1~/^[1-9]/ {print $1,$3}' annotations/chromsize.bed > annotations/ath.genome
# bedtools shuffle -i manuscript/deeptools/final_common_peaks.bed -g annotations/ath.genome | sort -k1,1n -k2,2n > manuscript/deeptools/nal_random_peaks.bed

# #### getting distance to centromeres
# rm manuscript/deeptools/distance_to_centromeres*
# printf "1\t14511722\t14803970\n2\t3611839\t3633423\n3\t13589757\t13867121\n4\t3133664\t3133674\n5\t11194538\t11723210\n" > annotations/centromere_positions.bed
# for chr in 1 2 3 4 5
# do
	# cs=$(awk -v c=$chr '$1==c {print $2}' annotations/centromere_positions.bed)
	# ce=$(awk -v c=$chr '$1==c {print $3}' annotations/centromere_positions.bed)
	# max=$(awk -v c=$chr '$1==c {print $2}' methyl/chromosome_sizes.txt)
	# cat manuscript/deeptools/regions_grouped_peaks.bed manuscript/deeptools/nal_random_peaks.bed | awk -v OFS="\t" -v c=$chr -v s=$cs -v e=$ce -v m=$max '$1==c {if ($3<s) d=((s-$3)/(s-1))*100; else if ($2>e) d=(($2-e)/(m-e))*100; else d=0; print $1,$2,$3,d}'  > manuscript/deeptools/distance_to_centromeres_${chr}.bedGraph
# done
# cat manuscript/deeptools/distance_to_centromeres_*.bedGraph | sort -k1,1n -k2,2n > manuscript/deeptools/distance_to_centromeres.bedGraph

# printf "Group\tPeak_ID\tDistance\tChr\tStart\tEnd\n" > manuscript/deeptools/distance_to_centromeres_grouped.txt
# for reg in random common MBD1 MBD4 MBD2_5_6 MBD5_6 MBD5_6_SUVH1_3
# do
	# bedtools intersect -a manuscript/deeptools/distance_to_centromeres.bedGraph -b manuscript/deeptools/*al_${reg}_peaks.bed -wa -f 1 -r | awk -v OFS="\t" -v r=$reg '{print r,r"_peaks_"NR,$4,$1,$2,$3}' >> manuscript/deeptools/distance_to_centromeres_grouped.txt
# done

##### plotting final heatmap of Distance to centromere over the 6 clusters

# bedGraphToBigWig manuscript/deeptools/distance_to_centromeres.bedGraph methyl/chromosome_sizes.txt manuscript/deeptools/distance_to_centromeres.bw
# printf "computing matrix part3 for peaks groups in $clustername\n"
# computeMatrix scale-regions -R manuscript/deeptools/regions_grouped_peaks.bed -S manuscript/deeptools/distance_to_centromeres.bw -bs 10 -b 0 -a 0 -m 300 -p 10 -o manuscript/deeptools/grouped_peaks_vf3.gz
# printf "plotting heatmap part3 for peaks groups\n"
# plotHeatmap -m manuscript/deeptools/grouped_peaks_vf3.gz -out manuscript/plots/heatmap_grouped_vf3.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel "Distance to centromere" --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"

# #####################################################################
# ### To get annotation files 

###  To annotate grouped peaks (and get the corresponding closest AGI)

# printf "Group\tAnnotation\tNumber\n" > manuscript/deeptools/annotated_grouped_peaks.txt
# printf "Group\tPeak_ID\tAnnotation\tAGI\n" > manuscript/deeptools/annotated_grouped_peaks_AGI.txt
# for file in manuscript/deeptools/*al_*_peaks.bed
# do
	# namext=${file##*/*al_}
	# name=${namext%.bed}
	# printf "$name\n\n"
	# awk -v OFS="\t" -v n=$name '{print $1,$2,$3,n"_"NR,".","."}' $file > manuscript/deeptools/temp0_${name}.bed
	# annotatePeaks.pl manuscript/deeptools/temp0_${name}.bed tair10 -gff annotations/Araport11_GFF3_genes_transposons.201606.gff -annStats manuscript/deeptools/stats_${name}_peaks.txt > manuscript/deeptools/tempA_${name}.txt
	# awk -v OFS="\t" '(NR>1) && ($9=="Intergenic" || $9=="Chr") {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="promoter-TSS" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1b_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="TTS" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1c_${name}.bed
	# awk -v OFS="\t" '(NR>1) && ($9=="exon" || $9=="3'"'"'" || $9=="5'"'"'") {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1d_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="intron" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1e_${name}.bed
	# bedtools intersect -a manuscript/deeptools/temp1_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1b_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2b_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1c_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2c_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1d_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2d_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1e_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2e_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE"; else print $1,$2,$3,$4,"Intergenic"}' manuscript/deeptools/temp2_${name}.txt > manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_prom"; else print $1,$2,$3,$4,"Promoter"}' manuscript/deeptools/temp2b_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_TTS"; else print $1,$2,$3,$4,"TTS"}' manuscript/deeptools/temp2c_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_exon"; else print $1,$2,$3,$4,"Exon"}' manuscript/deeptools/temp2d_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_intron"; else print $1,$2,$3,$4,"Intron"}' manuscript/deeptools/temp2e_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($5=="TE_intron" || $5=="TE_exon") print $1,$2,$3,$4,"TE_gene"; else print $1,$2,$3,$4,$5}' manuscript/deeptools/temp3_${name}.txt > manuscript/deeptools/temp4_${name}.txt
	# sort -k1,1 -k2,2n manuscript/deeptools/temp4_${name}.txt | uniq | awk -v OFS="\t" '{print $4,$5}' | sort -k1,1 > manuscript/deeptools/annotated_peaks_${name}.txt
	# awk -v OFS="\t" '{print $2}' manuscript/deeptools/annotated_peaks_${name}.txt | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,$2,$1}' >> manuscript/deeptools/annotated_grouped_peaks.txt
	# while read ID anno
	# do
		# awk -v OFS="\t" -v i=$ID '$1==i {print $10}' manuscript/deeptools/tempA_${name}.txt | sed 's/(//g' | sed 's/,//g' | awk -F"[.]" '{print $1}' | awk -v OFS="\t" -v i=$ID -v a=$anno -v n=$name '{print n,i,a,$1}' >> manuscript/deeptools/annotated_grouped_peaks_AGI.txt
	# done < manuscript/deeptools/annotated_peaks_${name}.txt
# done
# rm manuscript/deeptools/temp*

# ######## To plot different profiles over the 6 clusters

# clustername=$(cat manuscript/deeptools/regions_labels.txt)

# printf "computing regions matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_chip.gz
# printf "plotting regions profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_chip.gz -out manuscript/plots/profile_grouped_peaks_chip.pdf --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --plotType 'lines' --averageType 'mean' --startLabel "Peak" --endLabel "" --yAxisLabel "ChIP" --perGroup --regionsLabel $clustername

# for context in CG CHG CHH
# do
	# case $context in
		# CG) ymax=100
			# ymin=-1
			# ylab="mCG(%)";;
		# CHG) ymax=50
			# ymin=-0.5
			# ylab="mCHG(%)";;
		# CHH) ymax=20
			# ymin=-0.2
			# ylab="mCHH(%)";;
	# esac
	# printf "computing $context methylation matrix for peaks groups\n"
	# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_${context}.gz
	# printf "plotting $context methylation profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${context}.gz -out manuscript/plots/profile_grouped_peaks_${context}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --perGroup --yMin 0 --yMax $ymax
# done
	
# printf "computing rna matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_rna.gz
# printf "plotting rna profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_rna.gz -out manuscript/plots/profile_grouped_peaks_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA (median)" --perGroup
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# printf "computing $mark matrix for peaks groups\n"
	# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_${mark}.gz
	# printf "plotting $mark profile for peaks groups\n"
	# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_${mark}.gz -out manuscript/plots/profile_grouped_peaks_${mark}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup --outFileNameData manuscript/plots/profile_grouped_peaks_${mark}_data.txt
	# printf "plotting $mark heatmap for peaks groups\n"
	# plotHeatmap -m manuscript/deeptools/matrix_grouped_peaks_${mark}.gz -out manuscript/plots/heatmap_grouped_peaks_${mark}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"
# done

# printf "computing CG density matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/deeptools/final_*.bed -S annotations/CG_density.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz --missingDataAsZero
# printf "plotting CG density profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/matrix_grouped_peaks_cgdensity.gz -out manuscript/plots/profile_grouped_peaks_cgdensity.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel "CG density" --yAxisLabel "CG density" --perGroup


########################################################################################################
#### To check heatmap and profiles of clusters between TEs and Genes annotations

# rm manuscript/deeptools/after_annot_*.bed
# while read group peakid annot agi
# do
	# awk -v OFS="\t" -v i=$peakid '$2==i {print $4,$5,$6}' manuscript/deeptools/distance_to_centromeres_grouped.txt >> manuscript/deeptools/after_annot_${group}_${annot}.bed
# done < manuscript/deeptools/annotated_grouped_peaks_AGI.txt

# for group in common MBD1 MBD2_5_6 MBD4 MBD5_6 MBD5_6_SUVH1_3
# do
	# rm manuscript/deeptools/after_annot_${group}_labels.txt
	# rm manuscript/deeptools/after_annot_${group}_regions.txt
	# for name in Promoter Exon Intron TTS TE_prom TE_gene TE_TTS TE Intergenic
	# do
		# if [ -f ./manuscript/deeptools/after_annot_${group}_peaks_${name}.bed ]
		# then
			# wc -l manuscript/deeptools/after_annot_${group}_peaks_${name}.bed | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/after_annot_${group}_labels.txt
			# printf "manuscript/deeptools/after_annot_${group}_peaks_${name}.bed " >> manuscript/deeptools/after_annot_${group}_regions.txt
		# fi
	# done
	# clustername=$(cat manuscript/deeptools/after_annot_${group}_labels.txt)
	# regionsname=$(cat manuscript/deeptools/after_annot_${group}_regions.txt)

	# printf "computing matrix for $group peaks\n$clustername\n$regionsname\n"
	# computeMatrix scale-regions -R ${regionsname} -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/after_annot_${group}_peaks.gz
	# printf "plotting heatmap for $group peaks\n"
	# plotHeatmap -m manuscript/deeptools/after_annot_${group}_peaks.gz -out manuscript/plots/heatmap_after_annot_${group}_peaks.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1 --whatToShow "heatmap and colorbar"
	# printf "computing matrix for $group peaks mC\n$clustername\n$regionsname\n"
	# computeMatrix scale-regions -R ${regionsname} -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 100 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/after_annot_${group}_peaks_mC.gz
	# printf "plotting heatmap for $group peaks mC\n"
	# plotHeatmap -m manuscript/deeptools/after_annot_${group}_peaks_mC.gz -out manuscript/plots/heatmap_after_annot_${group}_peaks_mC.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel mCG mCHG mCHH --regionsLabel ${clustername} --zMin 0 --zMax 100 80 30 --whatToShow "heatmap and colorbar"
	
	# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
	# do
		# printf "computing matrix for $group peaks $mark\n$clustername\n$regionsname\n\n"
		# computeMatrix scale-regions -R ${regionsname} -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/after_annot_${group}_peaks_${mark}.gz
		# printf "plotting profile for $group peaks $mark\n"
		# plotProfile -m manuscript/deeptools/after_annot_${group}_peaks_${mark}.gz -out manuscript/plots/profile_after_annot_${group}_peaks_${mark}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel ${clustername} --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup
		# printf "plotting heatmap for $group peaks $mark\n"
		# plotHeatmap -m manuscript/deeptools/after_annot_${group}_peaks_${mark}.gz -out manuscript/plots/heatmap_after_annot_${group}_peaks_${mark}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel ${clustername} --whatToShow "heatmap and colorbar"
	# done
# done

###############################################################################################################
## To clusters based on H2AKub signal (and differences between mutants) and then plot all other marks

# for file in manuscript/deeptools/final_*.bed
# do
	# namext=${file##*/final_}
	# name=${namext%_peaks.bed}
	# for mark in H2AKub
	# do
		# printf "computing $mark matrix for $name group\n"
		# computeMatrix scale-regions -R $file -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_${name}_peaks_${mark}.gz
		# printf "plotting $mark heatmap for $name group in $clust clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${name}_peaks_${mark}.gz -out manuscript/plots/heatmap_${name}_peaks_${mark}_k6.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --kmeans 6 --outFileSortedRegions manuscript/plots/regions_${name}_peaks_${mark}_k6.bed
	# done
	# for clust in cluster_1 cluster_2 cluster_3 cluster_4 cluster_5 cluster_6
	# do
		# awk -v ORS=" " -v c=$clust '$13==c {n=n+1} END {print c"("n") "}' manuscript/plots/regions_${name}_peaks_H2AKub_k6.bed >> manuscript/deeptools/clustered_H2AKub_${name}_labels.txt
	# done
	# clustername=$(cat manuscript/deeptools/clustered_H2AKub_${name}_labels.txt)

	# printf "computing matrix for $name peaks\n$clustername\n"
	# computeMatrix scale-regions -R manuscript/plots/regions_${name}_peaks_H2AKub_k6.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw rna/WT_a.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/clustered_H2AKub_${name}_peaks.gz
	# printf "plotting heatmap for $group peaks\n"
	# plotHeatmap -m manuscript/deeptools/clustered_H2AKub_${name}_peaks.gz -out manuscript/plots/clustered_H2AKub_${name}_peaks_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'viridis' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 RNA Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1 1
	# printf "computing matrix for $name peaks mC\n$clustername\n"
	# computeMatrix scale-regions -R manuscript/plots/regions_${name}_peaks_H2AKub_k6.bed -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 100 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/clustered_H2AKub_${name}_peaks_mC.gz
	# printf "plotting heatmap for $name peaks mC\n"
	# plotHeatmap -m manuscript/deeptools/clustered_H2AKub_${name}_peaks_mC.gz -out manuscript/plots/clustered_H2AKub_${name}_peaks_heatmap_mC.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'Oranges' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel mCG mCHG mCHH --regionsLabel ${clustername} --zMin 0 --zMax 100 80 30
	# for mark in H3 H3K9me2 H3K4me1 H3K27me3
	# do
		# printf "computing $mark matrix for $name peaks\n"
		# computeMatrix scale-regions -R manuscript/plots/regions_${name}_peaks_H2AKub_k6.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/clustered_H2AKub_${name}_peaks_${mark}.gz
		# printf "plotting $mark heatmap for $name peaks\n"
		# plotHeatmap -m manuscript/deeptools/clustered_H2AKub_${name}_peaks_${mark}.gz -out manuscript/plots/clustered_H2AKub_${name}_peaks_heatmap_${mark}.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3  --regionsLabel ${clustername}
	# done
# done

##############################################################################
### To check patterns at MBD1 peaks

# bedtools intersect -wb -a manuscript/peaks_MBD1.bed -b annotations/all_genes.bed | awk -v OFS="\t" '{print $5,$6,$7,$8,$9,$10}' | sort -k1,1n -k2,2n | uniq > manuscript/genes_MBD1.bed

# printf "computing H2AKub matrix for MBD1 peaks\n"
# computeMatrix scale-regions -R manuscript/genes_MBD1.bed -S histones/MBD1_2_5_6_H2AKub_minusWT.bw -bs 20 -m 1000 -p 10 -o manuscript/deeptools/matrix_MBD1_genes_H2AKub.gz
# for clust in 2 3 4 5 6
# do
	# printf "plotting H2AKub heatmap for MBD1 genes in $clust clusters\n"
	# plotHeatmap -m manuscript/deeptools/matrix_MBD1_genes_H2AKub.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_heatmap.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Gene" --samplesLabel "MBD1/2/5/6 - WT" --kmeans ${clust} --outFileSortedRegions manuscript/plots/regions_MBD1_genes_H2AKub_k${clust}.bed
	# rm manuscript/deeptools/regions_MBD1_genes_H2AKub_labels_k${clust}.txt
	# for (( i=1; i<=$clust; i++ )) 
	# do 
		# cluster=$(printf "cluster_${i}")
		# awk -v ORS=" " -v c=$cluster '$13==c {n=n+1} END {print c"("n") "}' manuscript/plots/regions_MBD1_genes_H2AKub_k${clust}.bed >> manuscript/deeptools/regions_MBD1_genes_H2AKub_labels_k${clust}.txt
	# done
	# clustername=$(cat manuscript/deeptools/regions_MBD1_genes_H2AKub_labels_k${clust}.txt)
	
	# printf "computing chip region matrix for MBD1 genes in $clust clusters\n$clustername\n"
	# computeMatrix scale-regions -R manuscript/plots/regions_MBD1_genes_H2AKub_k${clust}.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_chip.gz
	# printf "plotting chip heatmap for MBD1 genes in $clust clusters\n"
	# plotHeatmap -m manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_chip.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_chip_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1
	
	# printf "computing chip TSS matrix for MBD1 genes in $clust clusters\n$clustername\n"
	# computeMatrix reference-point --referencePoint "TSS" -R manuscript/plots/regions_MBD1_genes_H2AKub_k${clust}.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 8000 -p 10 -o manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_chip2.gz
	# printf "plotting chip TSS heatmap for MBD1 genes in $clust clusters\n"
	# plotHeatmap -m manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_chip2.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_chip_heatmap2.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Blues' 'Reds' --interpolationMethod bilinear --refPointLabel "TSS" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1
	
	# printf "computing rna matrix for MBD1 genes in $clust clusters\n$clustername\n"
	# computeMatrix scale-regions -R manuscript/plots/regions_MBD1_genes_H2AKub_k${clust}.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_rna.gz
	# printf "plotting rna heatmap for MBD1 genes in $clust clusters\n"
	# plotHeatmap -m manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_rna.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_rna_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel ${clustername}
	# printf "plotting rna profile for MBD1 genes in $clust clusters\n"
	# plotProfile -m manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_rna.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_rna_profile.pdf --plotType 'lines' --averageType 'median' --regionsLabel ${clustername} --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates (median)" --numPlotsPerRow 4 --perGroup
	
	# for mark in H3 H2AKub H3K9me2 H3K4me1 H3K27me3
	# do
		# printf "computing $mark matrix for MBD1 genes in $clust clusters\n$clustername\n"
		# computeMatrix scale-regions -R manuscript/plots/regions_MBD1_genes_H2AKub_k${clust}.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_${mark}.gz
		# printf "plotting $mark heatmap for MBD1 genes in $clust clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_${mark}.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_${mark}_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3  --regionsLabel ${clustername}
		# printf "plotting $mark profile for MBD1 genes in $clust clusters\n"
		# plotProfile -m manuscript/deeptools/matrix_MBD1_genes_H2AKub_k${clust}_${mark}.gz -out manuscript/plots/MBD1_genes_H2AKub_k${clust}_${mark}_profile.pdf --plotType 'lines' --averageType 'mean' --regionsLabel ${clustername} --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark (log2FC vs Input)" --numPlotsPerRow 4 --perGroup
	# done
# done

### To check DEGs in these clusters, and extract AGI for RNA plots in R

# rm manuscript/plots/DEGs_in_MBD1_clusters.txt
# i=0
# for sample in mbd1_2_4 mbd1_2_5_6
# do
	# while read AGI dir
	# do
		# grep $AGI manuscript/plots/regions_MBD1_genes_H2AKub_k6.bed > manuscript/plots/temp_${i}.txt
		# n=$(wc -l manuscript/plots/temp_${i}.txt | awk '{print $1}')
		# if [[ $n == 1 ]]
		# then
			# # printf "$n should be 1\n"
			# awk -v OFS="\t" -v a=$AGI -v d=$dir -v s=$sample '{print s,a,$13,d,$1,$2,$3}' manuscript/plots/temp_${i}.txt >> manuscript/plots/DEGs_in_MBD1_clusters.txt
		# else 
			# # printf "$n should be 0\n"
			# awk -v OFS="\t" -v a=$AGI -v d=$dir -v s=$sample 'BEGIN {print s,a,"none",d}' >> manuscript/plots/DEGs_in_MBD1_clusters.txt
		# fi
		# i=$((i+1))
	# done < rna/DEG_${sample}.txt
# done
# rm manuscript/plots/temp*
# awk '{print $1,$3,$4}' manuscript/plots/DEGs_in_MBD1_clusters.txt | sort -k1,1 -k2,2 -k3,3 | uniq -c

# printf "Cluster\tAGI\n" > manuscript/plots/AGI_in_MBD1_clusters.txt
# for clust in cluster_1 cluster_2 cluster_3 cluster_4 cluster_5 cluster_6
# do
	# awk -v OFS="\t" -v c=$clust '$13==c {print c,$4}' manuscript/plots/regions_MBD1_genes_H2AKub_k6.bed >> manuscript/plots/AGI_in_MBD1_clusters.txt
# done
	
	
##############################################################################
### To check patterns at other candidate peaks

# for sample in MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# bedtools intersect -wb -a manuscript/peaks_${sample}.bed -b annotations/all_genes.bed | awk -v OFS="\t" '{print $5,$6,$7,$8,$9,$10}' | sort -k1,1n -k2,2n | uniq > manuscript/genes_${sample}.bed
	
	# case $sample in
		# MBD2) file=MBD1_2_5_6_H2AKub_minusWT.bw;;
		# MBD4) file=MBD1_2_4_H2AKub_minusWT.bw;;
		# MBD5) file=MBD1_2_5_6_H2AKub_minusWT.bw;;
		# MBD6) file=MBD1_2_5_6_H2AKub_minusWT.bw;;
		# SUVH1) file=SUVH1_3_H2AKub_minusWT.bw;;
		# SUVH3) file=SUVH1_3_H2AKub_minusWT.bw;;
	# esac

	# printf "computing H2AKub matrix for ${sample} peaks\n"
	# computeMatrix scale-regions -R manuscript/genes_${sample}.bed -S histones/$file -bs 20 -m 1000 -p 10 -o manuscript/deeptools/matrix_${sample}_genes_H2AKub.gz
	# for clust in 2 6
	# do
		# printf "plotting H2AKub heatmap for ${sample} genes in $clust clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${sample}_genes_H2AKub.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_heatmap.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --startLabel "" --endLabel "" --xAxisLabel "Gene" --samplesLabel "MBD1/2/5/6 - WT" --kmeans ${clust} --outFileSortedRegions manuscript/plots/regions_${sample}_genes_H2AKub_k${clust}.bed
		# rm manuscript/deeptools/regions_${sample}_genes_H2AKub_labels_k${clust}.txt
		# for (( i=1; i<=$clust; i++ )) 
		# do 
			# cluster=$(printf "cluster_${i}")
			# awk -v ORS=" " -v c=$cluster '$13==c {n=n+1} END {print c"("n") "}' manuscript/plots/regions_${sample}_genes_H2AKub_k${clust}.bed >> manuscript/deeptools/regions_${sample}_genes_H2AKub_labels_k${clust}.txt
		# done
		# clustername=$(cat manuscript/deeptools/regions_${sample}_genes_H2AKub_labels_k${clust}.txt)
	
		# printf "computing chip region matrix for ${sample} genes in $clust clusters\n$clustername\n"
		# computeMatrix scale-regions -R manuscript/plots/regions_${sample}_genes_H2AKub_k${clust}.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_chip.gz
		# printf "plotting chip heatmap for ${sample} genes in $clust clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_chip.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_chip_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Blues' 'Reds' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1
	
		# printf "computing chip TSS matrix for ${sample} genes in $clust clusters\n$clustername\n"
		# computeMatrix reference-point --referencePoint "TSS" -R manuscript/plots/regions_${sample}_genes_H2AKub_k${clust}.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 8000 -p 10 -o manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_chip2.gz
		# printf "plotting chip TSS heatmap for ${sample} genes in $clust clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_chip2.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_chip_heatmap2.pdf --sortRegions descend --sortUsing region_length --missingDataColor 0.8 --colorMap 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'coolwarm' 'Blues' 'Reds' --interpolationMethod bilinear --refPointLabel "TSS" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1 1
	
		# printf "computing rna matrix for ${sample} genes in $clust clusters\n$clustername\n"
		# computeMatrix scale-regions -R manuscript/plots/regions_${sample}_genes_H2AKub_k${clust}.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_rna.gz
		# printf "plotting rna heatmap for ${sample} genes in $clust clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_rna.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_rna_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --interpolationMethod bilinear --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --regionsLabel ${clustername}
		# printf "plotting rna profile for ${sample} genes in $clust clusters\n"
		# plotProfile -m manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_rna.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_rna_profile.pdf --plotType 'lines' --averageType 'median' --regionsLabel ${clustername} --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "RNA sum replicates (median)" --numPlotsPerRow 4 --perGroup
	
		# for mark in H3 H2AKub H3K9me2 H3K4me1 H3K27me3
		# do
			# printf "computing $mark matrix for ${sample} genes in $clust clusters\n$clustername\n"
			# computeMatrix scale-regions -R manuscript/plots/regions_${sample}_genes_H2AKub_k${clust}.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 1000 -a 1000 -m 1000 -p 10 -o manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_${mark}.gz
			# printf "plotting $mark heatmap for ${sample} genes in $clust clusters\n"
			# plotHeatmap -m manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_${mark}.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_${mark}_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3  --regionsLabel ${clustername}
			# printf "plotting $mark profile for ${sample} genes in $clust clusters\n"
			# plotProfile -m manuscript/deeptools/matrix_${sample}_genes_H2AKub_k${clust}_${mark}.gz -out manuscript/plots/${sample}_genes_H2AKub_k${clust}_${mark}_profile.pdf --plotType 'lines' --averageType 'mean' --regionsLabel ${clustername} --startLabel "TSS" --endLabel "TES" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark (log2FC vs Input)" --numPlotsPerRow 4 --perGroup
		# done
	# done
# ### To check DEGs in these clusters, and extract AGI for RNA plots in R

	# rm manuscript/plots/DEGs_in_${sample}_clusters.txt
	# i=0
	# for sample in mbd1_2_4 mbd1_2_5_6 mbd2_5_6 suvh1_3
	# do
		# while read AGI dir
		# do
			# grep $AGI manuscript/plots/regions_${sample}_genes_H2AKub_k6.bed > manuscript/plots/temp_${i}.txt
			# n=$(wc -l manuscript/plots/temp_${i}.txt | awk '{print $1}')
			# if [[ $n == 1 ]]
			# then
				# # printf "$n should be 1\n"
				# awk -v OFS="\t" -v a=$AGI -v d=$dir -v s=$sample '{print s,a,$13,d,$1,$2,$3}' manuscript/plots/temp_${i}.txt >> manuscript/plots/DEGs_in_${sample}_clusters.txt
			# else 
				# # printf "$n should be 0\n"
				# awk -v OFS="\t" -v a=$AGI -v d=$dir -v s=$sample 'BEGIN {print s,a,"none",d}' >> manuscript/plots/DEGs_in_${sample}_clusters.txt
			# fi
			# i=$((i+1))
		# done < rna/DEG_${sample}.txt
	# done
	# rm manuscript/plots/temp*
	# awk '{print $1,$3,$4}' manuscript/plots/DEGs_in_${sample}_clusters.txt | sort -k1,1 -k2,2 -k3,3 | uniq -c

	# printf "Cluster\tAGI\n" > manuscript/plots/AGI_in_${sample}_clusters.txt
	# for clust in cluster_1 cluster_2 cluster_3 cluster_4 cluster_5 cluster_6
	# do
		# awk -v OFS="\t" -v c=$clust '$13==c {print c,$4}' manuscript/plots/regions_${sample}_genes_H2AKub_k6.bed >> manuscript/plots/AGI_in_${sample}_clusters.txt
	# done
# done

	
##########################################################################################

### Check antisense RNA profiles

# awk -v OFS="\t" '{if ($6=="+") printf $0"\n" >> "annotations/genes_crick.bed"; else printf $0"\n" >> "annotations/genes_watson.bed" }' annotations/all_genes.bed

# mutlabels=(mbd1/2/4_a mbd1/2/4_b mbd1/2/4_c mbd2/5/6_a mbd2/5/6_b mbd2/5/6_c mbd1/2/5/6_a mbd1/2/5/6_b mbd1/2/5/6_c SUVH1/3_a SUVH1/3_b SUVH1/3_c WT_a WT_b WT_c)
# colors=("#70B6EC" "#70B6EC" "#70B6EC" "#9E61CB" "#9E61CB" "#9E61CB" "#4F4FCC" "#4F4FCC" "#4F4FCC" "#E8824E" "#E8824E" "#E8824E" "#000000" "#000000" "#000000")

# for strand in crick watson
# do
	# case "$strand" in
		# crick) dir="rev";;
		# watson) dir="for";;
	# esac
	# printf "computing matrix for $dir RNA on $strand genes\n"
	# computeMatrix scale-regions -R annotations/genes_${strand}.bed -S rna/mbd1_2_4_${dir}_log2.bw rna/mbd1_2_5_6_${dir}_log2.bw rna/mbd2_5_6_${dir}_log2.bw rna/suvh1_3_${dir}_log2.bw -bs 5 -m 1000 -p 10 -o manuscript/deeptools/matrix_${strand}_genes.gz
	# printf "plotting heatmap for $dir RNA on $strand genes\n"
	# plotHeatmap -m manuscript/deeptools/matrix_${strand}_genes.gz -out manuscript/plots/${strand}_genes_heatmap.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel "MBD1/2/4 $dir" "MBD1/2/5/6 $dir" "MBD2/5/6 $dir" "SUVH1/3 $dir" --regionsLabel "$strand Genes"
	# printf "plotting profiles for $dir RNA on $strand genes\n"
	# plotProfile -m manuscript/deeptools/matrix_${strand}_genes.gz -out manuscript/plots/${strand}_genes_profile_k0_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel "$strand Genes" --startLabel "TSS" --endLabel "TES" --samplesLabel "MBD1/2/4 $dir" "MBD1/2/5/6 $dir" "MBD2/5/6 $dir" "SUVH1/3 $dir" --yAxisLabel "median $dir RNA (log2FC vs WT)"
	# plotProfile -m manuscript/deeptools/matrix_${strand}_genes.gz -out manuscript/plots/${strand}_genes_profile_k0_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel "$strand Genes" --startLabel "TSS" --endLabel "TES" --samplesLabel "MBD1/2/4 $dir" "MBD1/2/5/6 $dir" "MBD2/5/6 $dir" "SUVH1/3 $dir" --yAxisLabel "mean $dir RNA (log2FC vs WT)"
	# for k in 2 3 4 5
	# for k in 4
	# do
		# rm manuscript/deeptools/temp_labels.txt
		# for (( i=1; i<=$k; i++ )) 
		# do
			# printf "${strand}_genes_cluster_${i} " >> manuscript/deeptools/temp_labels.txt
		# done
		# labels=$(cat manuscript/deeptools/temp_labels.txt)
		### add --regionsLabel $labels to plotHeatmap 
		
		# printf "plotting heatmap for $dir RNA on $labels\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${strand}_genes.gz -out manuscript/plots/${strand}_genes_heatmap_k${k}.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel "MBD1/2/4 $dir" "MBD1/2/5/6 $dir" "MBD2/5/6 $dir" "SUVH1/3 $dir" --kmeans $k --outFileSortedRegions manuscript/plots/regions_${strand}_genes_k${k}.bed

		# printf "plotting profiles for $dir RNA on $labels\n"
		# plotProfile -m manuscript/deeptools/matrix_${strand}_genes.gz -out manuscript/plots/${strand}_genes_profile_k${k}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $labels --startLabel "TSS" --endLabel "TES" --samplesLabel "MBD1/2/4 $dir" "MBD1/2/5/6 $dir" "MBD2/5/6 $dir" "SUVH1/3 $dir" --yAxisLabel "median $dir RNA (log2FC vs WT)" --kmeans $k --perGroup
		# plotProfile -m manuscript/deeptools/matrix_${strand}_genes.gz -out manuscript/plots/${strand}_genes_profile_k${k}_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $labels --startLabel "TSS" --endLabel "TES" --samplesLabel "MBD1/2/4 $dir" "MBD1/2/5/6 $dir" "MBD2/5/6 $dir" "SUVH1/3 $dir" --yAxisLabel "mean $dir RNA (log2FC vs WT)" --kmeans $k --perGroup
		
		# rm manuscript/plots/regions_${strand}_genes_k${k}_labels.txt
		# for (( i=1; i<=$k; i++ )) 
		# do 
			# cluster=$(printf "cluster_${i}")
			# awk -v ORS=" " -v c=$cluster '$13==c {n=n+1} END {print c"("n") "}' manuscript/plots/regions_${strand}_genes_k${k}.bed >> manuscript/plots/regions_${strand}_genes_k${k}_labels.txt
		# done
		# clustername=$(cat manuscript/plots/regions_${strand}_genes_k${k}_labels.txt)
		# printf "computing matrix for $dir RNA on $strand genes in $k clusters\n"
		# computeMatrix scale-regions -R manuscript/plots/regions_${strand}_genes_k${k}.bed -S rna/mbd1_2_4_a_${dir}.bw rna/mbd1_2_4_b_${dir}.bw rna/mbd1_2_4_c_${dir}.bw rna/mbd2_5_6_a_${dir}.bw rna/mbd2_5_6_b_${dir}.bw rna/mbd2_5_6_c_${dir}.bw rna/mbd1_2_5_6_a_${dir}.bw rna/mbd1_2_5_6_b_${dir}.bw rna/mbd1_2_5_6_c_${dir}.bw rna/suvh1_3_a_${dir}.bw rna/suvh1_3_b_${dir}.bw rna/suvh1_3_c_${dir}.bw rna/WT_a_${dir}.bw rna/WT_b_${dir}.bw rna/WT_c_${dir}.bw -bs 5 -m 1000 -p 10 -o manuscript/deeptools/matrix_${strand}_clustered_k${k}_genes.gz
		# printf "plotting heatmap for $dir RNA on $strand genes in $k clusters\n"
		# plotHeatmap -m manuscript/deeptools/matrix_${strand}_clustered_k${k}_genes.gz -out manuscript/plots/${strand}_clustered_k${k}_genes_heatmap.pdf --sortRegions keep --missingDataColor 0.8 --colorMap 'viridis' --startLabel "TSS" --endLabel "TES" --xAxisLabel "Gene" --samplesLabel "WT a" "WT b" "WT c" "MBD1/2/4 a" "MBD1/2/4 b" "MBD1/2/4 c" "MBD1/2/5/6 a" "MBD1/2/5/6 b" "MBD1/2/5/6 c" "MBD2/5/6 a" "MBD2/5/6 b" "MBD2/5/6 c" "SUVH1/3 a" "SUVH1/3 b" "SUVH1/3 c" --regionsLabel $clustername
		# printf "plotting profiles for $dir RNA on $strand genes in $k clusters\n"
		# plotProfile -m manuscript/deeptools/matrix_${strand}_clustered_k${k}_genes.gz -out manuscript/plots/${strand}_clustered_k${k}_genes_profile_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "TSS" --endLabel "TES" --samplesLabel ${mutlabels[@]} --yAxisLabel "median $dir RNA" --colors ${colors[@]} --perGroup
		# plotProfile -m manuscript/deeptools/matrix_${strand}_clustered_k${k}_genes.gz -out manuscript/plots/${strand}_clustered_k${k}_genes_profile_mean.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "TSS" --endLabel "TES" --samplesLabel ${mutlabels[@]} --yAxisLabel "mean $dir RNA" --colors ${colors[@]} --perGroup
	# done
# done


#######################################################################################################################################################################
####### To find a better way to grouping peaks (v2_ prefix)

#### To create a bedGraph file for each sample to check overall ChIP enrichment vs Input

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# # printf "\nbamCompare readCount for $sample\n"
	# # bamCompare -b1 manuscript/${sample}_HA.bam -b2 manuscript/${sample}_INPUT.bam -o manuscript/compare_readCount_log2FC_${sample}.bedGraph -of "bedgraph" --scaleFactorsMethod "readCount" --operation "log2" -bs 10 -p 8
	# # printf "\nbamCompare CPM for $sample\n"
	# # bamCompare -b1 manuscript/${sample}_HA.bam -b2 manuscript/${sample}_INPUT.bam -o manuscript/compare_CPM_log2FC_${sample}.bedGraph -of "bedgraph" --scaleFactorsMethod "None" --normalizeUsing "CPM" --operation "log2" -bs 10 -p 8
	# printf "\nbamCompare CPM ratio for $sample\n"
	# bamCompare -b1 manuscript/${sample}_HA.bam -b2 manuscript/${sample}_INPUT.bam -o manuscript/compare_CPM_ratio_${sample}.bedGraph -of "bedgraph" --scaleFactorsMethod "None" --normalizeUsing "CPM" --operation "ratio" -bs 10 -p 8
	# printf "\nbamCompare CPM reciprocal ratio for $sample\n"
	# bamCompare -b1 manuscript/${sample}_HA.bam -b2 manuscript/${sample}_INPUT.bam -o manuscript/compare_CPM_rec_ratio_${sample}.bedGraph -of "bedgraph" --scaleFactorsMethod "None" --normalizeUsing "CPM" --operation "reciprocal_ratio" -bs 10 -p 8
# done

#### To check enrichment of each candidate ChIP at each merged peak to call the groups

# printf "Sample\tPeaks (log2FC>0.1)\tPeaks (log2FC>0.2)\n" > manuscript/summary_overlap.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do	
	# # case "$sample" in
		# # MBD1) 	n=0.2;;
		# # MBD2)	n=0.2;;
		# # MBD4)	n=0.2;;
		# # MBD5)	n=0.2;;
		# # MBD6)	n=0.2;;
		# # SUVH1)	n=0.2;;
		# # SUVH3)	n=0.2;;
	# # esac
	# printf "\nintersect $sample with merged peak file\n"
	# bedtools intersect -a manuscript/peaks_merged.bed -b manuscript/compare_CPM_log2FC_${sample}.bedGraph -wao | awk -v OFS="\t" '{print $1,$2,$3,$7*$8,$8}' > manuscript/temp_overlap_merged_peaks_${sample}.txt
	# bedtools merge -i manuscript/temp_overlap_merged_peaks_${sample}.txt -c 4,5 -o sum,sum | awk -v OFS="\t" '{m=$4/$5; print $1,$2,$3,m}' > manuscript/overlap_merged_peaks_${sample}.txt
		
	# awk -v OFS="\t" -v s=$sample '{if ($4 >= 0.1) a+=1; if ($4 >= 0.2) b+=1} END {print s,a,b}' manuscript/overlap_merged_peaks_${sample}.txt >> manuscript/summary_overlap.txt
	# awk -v OFS="\t" -v s=$sample '{print $4}' manuscript/overlap_merged_peaks_${sample}.txt > manuscript/overlap_${sample}_col.txt
# done

# rm manuscript/temp*.txt	
	
# printf "Chr\tStart\tStop\tMBD1\tMBD2\tMBD4\tMBD5\tMBD6\tSUVH1\tSUVH3\n" > manuscript/overlap_merged_peaks_table.txt
# paste manuscript/peaks_merged.bed manuscript/overlap_MBD1_col.txt manuscript/overlap_MBD2_col.txt manuscript/overlap_MBD4_col.txt manuscript/overlap_MBD5_col.txt manuscript/overlap_MBD6_col.txt manuscript/overlap_SUVH1_col.txt manuscript/overlap_SUVH3_col.txt >> manuscript/overlap_merged_peaks_table.txt

# printf "Count\n" > manuscript/overlap_count_col.txt
# awk 'NR>1 {a=0; if ($4>=0.2) a+=1; if ($5>=0.2) a+=1; if ($6>=0.2) a+=1; if ($7>=0.2) a+=1; if ($8>=0.2) a+=1; if ($9>=0.2) a+=1; if ($10>=0.2) a+=1; print a;}' manuscript/overlap_merged_peaks_table.txt >> manuscript/overlap_count_col.txt

# paste manuscript/overlap_merged_peaks_table.txt manuscript/overlap_count_col.txt > manuscript/overlap_merged_peaks_table_vf.txt

# head manuscript/overlap_merged_peaks_table_vf.txt

#### To split peaks based on enrichment of specific candidates

# rm manuscript/v2_*_only_peaks.bed
# awk 'NR>1 {if ($11>=6) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_all_candidates_only_peaks.bed"; 
# else if ($7>=0.2 && $8>=0.2 && $9>=0.2 && $10>=0.2 && $11>=4) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD5_MBD6_SUVH1_SUVH3_only_peaks.bed"; 
# else if ($5>=0.2 && $7>=0.2 && $8>=0.2 && $11>=3) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD2_MBD5_MBD6_only_peaks.bed"; 
# else if ($9>=0.2 && $10>=0.2 && $11>=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_SUVH1_SUVH3_only_peaks.bed"; 
# else if ($7>=0.2 && $8>=0.2 && $11>=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD5_MBD6_only_peaks.bed"; 
# else if ($9>=0.2 && $11<=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_SUVH1_SUVH3_only_peaks.bed"; 
# else if ($10>=0.2 && $11<=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_SUVH1_SUVH3_only_peaks.bed"; 
# else if ($5>=0.2 && $11<=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD2_MBD5_MBD6_only_peaks.bed"; 
# else if ($7>=0.2 && $11<=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD5_MBD6_only_peaks.bed"; 
# else if ($8>=0.2 && $11<=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD5_MBD6_only_peaks.bed"; 
# else if ($9>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_SUVH1_SUVH3_only_peaks.bed"; 
# else if ($10>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_SUVH1_SUVH3_only_peaks.bed"; 
# else if ($7>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD5_MBD6_only_peaks.bed";
# else if ($8>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD5_MBD6_only_peaks.bed";
# else if ($5>=0.2 && $4>=0.2 && $11>=2) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD1_MBD2_only_peaks.bed"; 
# else if ($4>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD1_only_peaks.bed"; 
# else if ($5>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD2_MBD5_MBD6_only_peaks.bed"; 
# else if ($6>=0.2 && $11>=1) printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_MBD4_only_peaks.bed";
# else printf $1"\t"$2"\t"$3"\n" >> "manuscript/v2_ungrouped_only_peaks.bed";}' manuscript/overlap_merged_peaks_table_vf.txt

# wc -l manuscript/v2_*_only*

# ####  To randomize the common peak file

# awk -v OFS="\t" '$1~/^[1-9]/ {print $1,$3}' annotations/chromsize.bed > annotations/ath.genome
# bedtools shuffle -i manuscript/v2_all_candidates_only_peaks.bed -g annotations/ath.genome > manuscript/v2_random_only_peaks.bed

### To get the average size of peaks in eahc group

# printf "Group\tAverage_peak_size\n" > manuscript/v2_peak_size_average.txt
# for file in manuscript/v2_*_only_peaks.bed
# do
	# namext=${file##*/v2_}
	# name=${namext%_only_peaks.bed}
	# awk -v OFS="\t" -v n=$name '{a=$3-$2; b+=a} END {print n,b/NR}' $file >> manuscript/v2_peak_size_average.txt
# done

# cat manuscript/v2_peak_size_average.txt

# ### To plot heatmap of each group of peaks

# for file in manuscript/v2_*_only_peaks.bed
# do
	# namext=${file##*/v2_}
	# name=${namext%_only_peaks.bed}
	# printf "Computing ChIP matrix for $name\n"
	# computeMatrix scale-regions -R ${file} -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/matrix_v2_${name}_chip.gz
	# lab=$(wc -l $file | awk -v ORS=" " -v n=$name '{printf n"("$1")"}')
	# printf "Plotting ChIP heatmap for $lab\n"
	# plotHeatmap -m manuscript/deeptools/matrix_v2_${name}_chip.gz -out manuscript/plots/v2_${name}_heatmap.pdf --sortRegions descend --sortUsing mean --sortUsingSamples 12 --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'Oranges' 'Oranges' 'Oranges' 'Blues' 'Reds' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 mCG mCHG mCHH Genes TEs --regionsLabel ${lab} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 50 30 1 1 --whatToShow "heatmap and colorbar"
# done

# rm manuscript/temp*

# ####  To annotate grouped peaks

# printf "Group\tAnnotation\tNumber\n" > manuscript/v2_annotated_grouped_peaks.txt
# for file in manuscript/v2_*_peaks.bed
# do
	# namext=${file##*/v2_}
	# name=${namext%_only_peaks.bed}
	# printf "$name\n\n"
	# awk -v OFS="\t" -v n=$name '{print $1,$2,$3,n"_"NR,".","."}' $file > manuscript/deeptools/temp0_${name}.bed
	# annotatePeaks.pl manuscript/deeptools/temp0_${name}.bed tair10 -gff annotations/Araport11_GFF3_genes_transposons.201606.gff -annStats manuscript/deeptools/stats_${name}_peaks.txt > manuscript/deeptools/tempA_${name}.txt
	# awk -v OFS="\t" '(NR>1) && ($9=="Intergenic" || $9=="Chr") {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="promoter-TSS" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1b_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="TTS" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1c_${name}.bed
	# awk -v OFS="\t" '(NR>1) && ($9=="exon" || $9=="3'"'"'" || $9=="5'"'"'") {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1d_${name}.bed
	# awk -v OFS="\t" '(NR>1) && $9=="intron" {print $2,$3,$4,$1}' manuscript/deeptools/tempA_${name}.txt | sort -k1,1 -k2,2n > manuscript/deeptools/temp1e_${name}.bed
	# bedtools intersect -a manuscript/deeptools/temp1_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1b_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2b_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1c_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2c_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1d_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2d_${name}.txt
	# bedtools intersect -a manuscript/deeptools/temp1e_${name}.bed -b annotations/all_TE.gff -loj | sort -k1,1 -k2,2n > manuscript/deeptools/temp2e_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE"; else print $1,$2,$3,$4,"Intergenic"}' manuscript/deeptools/temp2_${name}.txt > manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_prom"; else print $1,$2,$3,$4,"Promoter"}' manuscript/deeptools/temp2b_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_TTS"; else print $1,$2,$3,$4,"TTS"}' manuscript/deeptools/temp2c_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_exon"; else print $1,$2,$3,$4,"Exon"}' manuscript/deeptools/temp2d_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_intron"; else print $1,$2,$3,$4,"Intron"}' manuscript/deeptools/temp2e_${name}.txt >> manuscript/deeptools/temp3_${name}.txt
	# awk -v OFS="\t" '{if ($5=="TE_intron" || $5=="TE_exon") print $1,$2,$3,$4,"TE_gene"; else print $1,$2,$3,$4,$5}' manuscript/deeptools/temp3_${name}.txt > manuscript/deeptools/temp4_${name}.txt
	# sort -k1,1 -k2,2n manuscript/deeptools/temp4_${name}.txt | uniq | awk -v OFS="\t" '{print $4,$5}' | sort -k1,1 > manuscript/v2_annotated_peaks_${name}.txt
	# head manuscript/v2_annotated_peaks_${name}.txt
	# awk -v OFS="\t" '{print $2}' manuscript/v2_annotated_peaks_${name}.txt | sort | uniq -c | awk -v OFS="\t" -v n=$name '{print n,$2,$1}' >> manuscript/v2_annotated_grouped_peaks.txt
# done
# rm manuscript/deeptools/temp*

# #### To plot other marks on these peaks

# rm manuscript/deeptools/regions_labels.txt
# for file in manuscript/v2_*_only_peaks.bed
# do
	# namext=${file##*/v2_}
	# name=${namext%_only_peaks.bed}
	# wc -l ${file} | awk -v ORS=" " -v n=$name '{printf n"("$1") "}' >> manuscript/deeptools/regions_labels.txt
# done
# clustername=$(cat manuscript/deeptools/regions_labels.txt)

# printf "Computing ChIP matrix\n"
# computeMatrix scale-regions -R manuscript/v2_*_only_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 50 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/v2_matrix_grouped_peaks_chip.gz
# printf "Plotting ChIP heatmap\n"
# plotHeatmap -m manuscript/deeptools/v2_matrix_grouped_peaks_chip.gz -out manuscript/plots/v2_heatmap_grouped_peaks_chip.pdf --sortRegions descend --sortUsing mean --sortUsingSamples 12 --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'Oranges' 'Oranges' 'Oranges' 'Blues' 'Reds' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Peak" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 mCG mCHG mCHH Genes TEs --regionsLabel ${clustername} --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 50 30 1 1 --whatToShow "heatmap and colorbar"

# printf "Computing ChIP matrix\n"
# computeMatrix scale-regions -R manuscript/v2_*_only_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/v2_matrix_grouped_peaks_chip2.gz
# printf "plotting regions profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/v2_matrix_grouped_peaks_chip2.gz -out manuscript/plots/v2_profile_grouped_peaks_chip.pdf --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 WT --plotType 'lines' --averageType 'mean' --startLabel "Peak" --endLabel "" --yAxisLabel "ChIP" --perGroup --regionsLabel $clustername

# for context in CG CHG CHH
# do
	# case $context in
		# CG) ymax=100
			# ymin=-1
			# ylab="mCG(%)";;
		# CHG) ymax=50
			# ymin=-0.5
			# ylab="mCHG(%)";;
		# CHH) ymax=20
			# ymin=-0.2
			# ylab="mCHH(%)";;
	# esac
	# printf "computing $context methylation matrix for peaks groups\n"
	# computeMatrix scale-regions -R manuscript/v2_*_only_peaks.bed -S methyl/${context}_WT_seedlings.bw methyl/${context}_mbd1_2_4.bw methyl/${context}_mbd1_2_5_6.bw methyl/${context}_mbd2_5_6.bw methyl/${context}_SUVH1_SUVH3.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/v2_matrix_grouped_peaks_m${context}.gz
	# printf "plotting $context methylation profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/v2_matrix_grouped_peaks_m${context}.gz -out manuscript/plots/v2_profile_grouped_peaks_m${context}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel $ylab --perGroup --yMin 0 --yMax $ymax
# done
	
# printf "computing rna matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/v2_*_only_peaks.bed -S rna/WT_sum.bw rna/mbd1_2_4_sum.bw rna/mbd1_2_5_6_sum.bw rna/mbd2_5_6_sum.bw rna/suvh1_3_sum.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/v2_matrix_grouped_peaks_rna.gz
# printf "plotting rna profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/v2_matrix_grouped_peaks_rna.gz -out manuscript/plots/v2_profile_grouped_peaks_rna_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "sum RNA replicates" --perGroup
	
# for mark in H3 H2AKub H3K4me1 H3K27me3 H3K9me2
# do
	# printf "computing $mark matrix for peaks groups\n"
	# computeMatrix scale-regions -R manuscript/v2_*_only_peaks.bed -S histones/WT_${mark}.bw histones/MBD1_2_4_${mark}.bw histones/MBD1_2_5_6_${mark}.bw histones/MBD2_5_6_${mark}.bw histones/SUVH1_3_${mark}.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/v2_matrix_grouped_peaks_${mark}.gz
	# printf "plotting $mark profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/v2_matrix_grouped_peaks_${mark}.gz -out manuscript/plots/v2_profile_grouped_peaks_${mark}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup --yMin -0.5 --yMax 0.5
	# printf "plotting $mark profiles for peaks groups\n"
	# plotProfile -m manuscript/deeptools/v2_matrix_grouped_peaks_${mark}.gz -out manuscript/plots/v2_profile_grouped_peaks_${mark}_median.pdf --plotType 'lines' --averageType 'median' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel WT MBD1/2/4 MBD1/2/5/6 MBD2/5/6 SUVH1/3 --yAxisLabel "$mark" --perGroup --yMin -0.5 --yMax 0.5
# done

# printf "computing CG density matrix for peaks groups\n"
# computeMatrix scale-regions -R manuscript/v2_*_only_peaks.bed -S annotations/CG_density.bw -bs 20 -b 2000 -a 2000 -m 300 -p 10 -o manuscript/deeptools/v2_matrix_grouped_peaks_cgdensity.gz --missingDataAsZero
# printf "plotting CG density profiles for peaks groups\n"
# plotProfile -m manuscript/deeptools/v2_matrix_grouped_peaks_cgdensity.gz -out manuscript/plots/v2_profile_grouped_peaks_cgdensity.pdf --plotType 'lines' --averageType 'mean' --regionsLabel $clustername --startLabel "Peak" --endLabel "" --samplesLabel "CG density" --yAxisLabel "CG density" --perGroup


###############################################################################################################################################################################

# # ####  To create simple annotation files

# awk -v OFS="\t" 'NR>1 && $3=="gene" {print $1,$4+1,$5,$6,$7,$9}' annotations/TAIR10_GFF3_genes_transposons.gff | awk -F'[=;]' -v OFS="\t" '{print $1,$2}' | sed 's/Chr//' | awk -v OFS="\t" '$1~/[1-9]/ {print $1,$2,$3,$7,$4,$5}' > manuscript/annotations/genes.bed

# awk -v OFS="\t" 'NR>1 && $3=="transposable_element_gene" {print $1,$4+1,$5,$6,$7,$9}' annotations/TAIR10_GFF3_genes_transposons.gff | awk -F'[=;]' -v OFS="\t" '{print $1,$2}' | sed 's/Chr//' | awk -v OFS="\t" '$1~/[1-9]/ {print $1,$2,$3,$7,$4,$5}' > manuscript/annotations/te_genes.bed

# awk -v OFS="\t" 'NR>1 && $3=="transposable_element" {print $1,$4+1,$5,$6,$7,$9}' annotations/TAIR10_GFF3_genes_transposons.gff | awk -F'[=;]' -v OFS="\t" '{print $1,$2}' | sed 's/Chr//' | awk -v OFS="\t" '$1~/[1-9]/ {print $1,$2,$3,$7,$4,$5}' > manuscript/annotations/tes.bed

# awk -v OFS="\t" 'NR>1 && $3=="pseudogene" {print $1,$4+1,$5,$6,$7,$9}' annotations/TAIR10_GFF3_genes_transposons.gff | awk -F'[=;]' -v OFS="\t" '{print $1,$2}' | sed 's/Chr//' | awk -v OFS="\t" '$1~/[1-9]/ {print $1,$2,$3,$7,$4,$5}' > manuscript/annotations/pseudogenes.bed

# cat manuscript/annotations/pseudogenes.bed manuscript/annotations/te_genes.bed | sort -k1,1n -k2,2n > manuscript/annotations/other_genes.bed

# ##### To annotate peaks

# bedtools intersect -wa -a manuscript/annotations/tes.bed -b manuscript/peaks_merged.bed | uniq > manuscript/tes_with_peaks.bed
# bedtools intersect -wa -wb -a manuscript/peaks_merged.bed -b manuscript/annotations/tes.bed | awk -v OFS="\t" '{print $1,$2,$3,$4,$8}' > manuscript/peaks_in_tes.bed
# bedtools intersect -wa -v -a manuscript/peaks_merged.bed -b manuscript/peaks_in_tes.bed > manuscript/peaks_not_in_tes.bed

# bedtools intersect -wa -a manuscript/annotations/genes.bed -b manuscript/peaks_not_in_tes.bed | uniq > manuscript/genes_with_peaks.bed
# bedtools intersect -wa -wb -a manuscript/peaks_not_in_tes.bed -b manuscript/annotations/genes.bed | awk -v OFS="\t" '{print $1,$2,$3,$4,$8}' > manuscript/peaks_in_genes.bed
# bedtools intersect -wa -v -a manuscript/peaks_not_in_tes.bed -b manuscript/peaks_in_genes.bed | uniq > manuscript/peaks_not_in_genes_or_tes.bed

# bedtools intersect -wa -a manuscript/annotations/other_genes.bed -b manuscript/peaks_not_in_genes_or_tes.bed > manuscript/other_genes_with_peaks.bed
# bedtools intersect -wa -wb -a manuscript/peaks_not_in_genes_or_tes.bed -b manuscript/annotations/other_genes.bed | awk -v OFS="\t" '{print $1,$2,$3,$4,$8}' > manuscript/peaks_in_other_genes.bed
# bedtools intersect -wa -v -a manuscript/peaks_not_in_genes_or_tes.bed -b manuscript/peaks_in_other_genes.bed | uniq > manuscript/peaks_in_intergenic.bed

# wc -l manuscript/peaks_merged.bed | awk '{print $1" peaks in total"}' > manuscript/simple_peak_stats.txt
# for file in manuscript/peaks_in_tes.bed manuscript/peaks_in_genes.bed manuscript/peaks_in_other_genes.bed
# do
	# namext=${file##*/peaks_in_}
	# name=${namext%.bed}
	# l=$(wc -l manuscript/${name}_with_peaks.bed | awk '{print $1}')
	# case "$name" in 
		# tes) m=$(wc -l manuscript/peaks_not_in_tes.bed | awk '{print $1}');;
		# genes) m=$(wc -l manuscript/peaks_not_in_genes_or_tes.bed | awk '{print $1}');;
		# other_genes) m=$(wc -l manuscript/peaks_in_intergenic.bed | awk '{print $1}');;
	# esac
	# awk '{print $4}' $file | sort | uniq | wc -l | awk -v n=$name -v l=$l -v m=$m '{print $1" peaks in "l" "n" ("m" peaks remaining)"}' >> manuscript/simple_peak_stats.txt
# done

# cat manuscript/simple_peak_stats.txt

# #### To plot with deeptools

# ### for bound genes
# printf "\nComputing matrix for genes with peaks pass1\n"
# computeMatrix scale-regions -R manuscript/genes_with_peaks.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD5.bw manuscript/SUVH3.bw -bs 20 -b 0 -a 0 -p 10 -o manuscript/deeptools/matrix_genes_with_peaks_pass1.gz
	
# for cluster in 3 4 5
# do
	# printf "\nPlotting ChIP heatmap for genes with peaks in $cluster clusters pass1\n"
	# plotHeatmap -m manuscript/deeptools/matrix_genes_with_peaks_pass1.gz -out manuscript/plots/001_heatmap_genes_with_peaks_k${cluster}_pass1.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' 'Oranges' 'Oranges' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD5 SUVH3 --zMin -1.5 -1.5 0 --zMax 1.5 1.5 30 --whatToShow "heatmap and colorbar" --hclust $cluster --outFileSortedRegions manuscript/deeptools/regions_genes_with_peaks_k${cluster}.bed
		
	# rm manuscript/deeptools/regions_genes_with_peaks_k${cluster}_labels.txt
	# for (( i=1; i<=$cluster; i++ )) 
	# do 
		# clust=$(printf "cluster_${i}")
		# awk -v ORS=" " -v c=$clust '$13==c {n=n+1} END {print c"("n") "}' manuscript/deeptools/regions_genes_with_peaks_k${cluster}.bed >> manuscript/deeptools/regions_genes_with_peaks_k${cluster}_labels.txt
	# done
	# clustername=$(cat manuscript/deeptools/regions_genes_with_peaks_k${cluster}_labels.txt)
		
	# printf "\nComputing matrix for genes with peaks pass2\n"
	# computeMatrix scale-regions -R manuscript/deeptools/regions_genes_with_peaks_k${cluster}.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 0 -a 0 -m 2000 -p 10 -o manuscript/deeptools/matrix_genes_with_peaks_pass2.gz
	# printf "\nPlotting ChIP heatmap for $annot peaks in $cluster clusters\n"
	# plotHeatmap -m manuscript/deeptools/matrix_genes_with_peaks_pass2.gz -out manuscript/plots/000_heatmap_genes_with_peaks_k${cluster}.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'Oranges' 'Oranges' 'Oranges' 'Blues' 'Reds' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "Genes" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 mCG mCHG mCHH Genes TEs --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 50 30 1 1 --whatToShow "heatmap and colorbar" --regionsLabel $clustername
# done

#### for "manual" clustering of genes

# printf "\nComputing matrix for genes with peaks for manual curating\n"
# computeMatrix scale-regions -R manuscript/genes_with_peaks.bed -S methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw -bs 20 -b 0 -a 0 -m 1000 -p 10 -o manuscript/deeptools/matrix_genes_with_peaks.gz --skipZeros
	
# printf "\nPlotting heatmap for genes with peaks for manual curating\n"
# plotHeatmap -m manuscript/deeptools/matrix_genes_with_peaks.gz -out manuscript/plots/002_heatmap_genes_with_peaks.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' 'Oranges' 'Oranges' --interpolationMethod bilinear --startLabel "" --endLabel "" --xAxisLabel "Genes" --samplesLabel CG CHG CHH --zMin -1.5 -1.5 0 --zMax 1.5 1.5 30 --whatToShow "heatmap and colorbar" --outFileNameMatrix manuscript/deeptools/matrix_genes_with_peaks.txt

# awk 'NR==4' manuscript/deeptools/matrix_genes_with_peaks.txt

# ### for tes

# ### To split TEs based on their distance to genes

# bedtools closest -a manuscript/peaks_in_tes.bed -b manuscript/annotations/genes.bed -D b -t first | awk -v OFS="\t" '{if ($12<2000 && $12>-2000) print $6,$7,$8,$9,$10,$11>"manuscript/temp_genes_with_te_peaks.bed"; else print $1,$2,$3,$4,".",".">"manuscript/peaks_in_tes_far_from_genes.bed"}'

# sort -k1,1n -k2,2n manuscript/temp_genes_with_te_peaks.bed | uniq > manuscript/genes_with_te_peaks.bed

# bedtools intersect -wa -a manuscript/annotations/tes.bed -b manuscript/peaks_in_tes_far_from_genes.bed | uniq > manuscript/tes_with_peaks_far_from_genes.bed
# bedtools merge -i manuscript/tes_with_peaks_far_from_genes.bed -d 500 > manuscript/regions_with_peaks_far_from_genes.bed

# bedtools intersect -v -wa -a manuscript/genes_with_te_peaks.bed -b manuscript/genes_with_peaks.bed > manuscript/genes_with_te_peaks_only.bed

# awk '{a=$3-$2; if (a<1000) print $0>"manuscript/genes_with_te_peaks_only_short.bed"; else print $0>"manuscript/genes_with_te_peaks_only_long.bed"}' manuscript/genes_with_te_peaks_only.bed

# awk '{a=$3-$2; if (a<1000) print $0>"manuscript/regions_with_peaks_far_from_genes_short.bed"; else print $0>"manuscript/regions_with_peaks_far_from_genes_long.bed"}' manuscript/regions_with_peaks_far_from_genes.bed

# printf "\nComputing matrix for long tes with peaks\n"
# computeMatrix scale-regions -R manuscript/genes_with_te_peaks_only.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 2000 -a 2000 -m 2000 -p 10 -o manuscript/deeptools/matrix_genes_with_te_peaks_only.gz
# printf "\nPlotting ChIP heatmap for long tes with peaks\n"
# plotHeatmap -m manuscript/deeptools/matrix_genes_with_te_peaks_only.gz -out manuscript/plots/000_heatmap_genes_with_te_peaks_only.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'Oranges' 'Oranges' 'Oranges' 'Blues' 'Reds' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "TE" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 mCG mCHG mCHH Genes TEs --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 50 30 1 1 --whatToShow "heatmap and colorbar"

# printf "\nComputing matrix for short tes with peaks\n"
# computeMatrix scale-regions -R manuscript/regions_with_peaks_far_from_genes.bed -S manuscript/MBD1.bw manuscript/MBD2.bw manuscript/MBD4.bw manuscript/MBD5.bw manuscript/MBD6.bw manuscript/SUVH1.bw manuscript/SUVH3.bw manuscript/WT.bw methyl/CG_WT_seedlings.bw methyl/CHG_WT_seedlings.bw methyl/CHH_WT_seedlings.bw manuscript/all_genes.bw manuscript/all_TEs.bw -bs 20 -b 1000 -a 2000 -m 2000 -p 10 -o manuscript/deeptools/matrix_regions_with_peaks_far_from_genes.gz
# printf "\nPlotting ChIP heatmap for long tes with peaks\n"
# plotHeatmap -m manuscript/deeptools/matrix_regions_with_peaks_far_from_genes.gz -out manuscript/plots/000_heatmap_regions_with_peaks_far_from_genes.pdf --sortRegions descend --sortUsing mean --missingDataColor 0.8 --colorMap 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'viridis' 'Oranges' 'Oranges' 'Oranges' 'Blues' 'Reds' --interpolationMethod nearest --startLabel "" --endLabel "" --xAxisLabel "TE" --samplesLabel MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3 Col0 mCG mCHG mCHH Genes TEs --zMin -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 -1.5 0 0 0 0 0 --zMax 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 100 50 30 1 1 --whatToShow "heatmap and colorbar"


#####################################################################################################################
################################################# OLD CODE NOT USED #################################################
#####################################################################################################################

##### To calculate the final peak stat table

# printf "Sample\tNbPeaks_Merged\tNbPeaks_rep1\tNbPeaks_Overlap_rep1\tPercentPeaks_Overlap_rep1(vsMerged)\tPercentBp_Overlap_rep1(vsMerged)\tNbPeaks_rep2\tNbPeaks_Overlap_rep2\tPercentPeaks_Overlap_rep2(vsMerged)\tPercentBp_Overlap_rep2(vsMerged)\tNbPeaks_MergedOnly\tPercentPeaks_MergedOnly(vsMerged)\tPercentBp_MergedOnly(vsMerged)\n" > manuscript/Stats_Total.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# peakmerged=$(wc -l manuscript/peaks_${sample}.bed | awk '{print $1}')
	# peakrep1=$(wc -l manuscript/individual/peaks_${sample}_rep1.bed | awk '{print $1}')
	# peakrep2=$(wc -l manuscript/individual/peaks_${sample}_rep2.bed | awk '{print $1}')
	# overlaprep1=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | wc -l | awk '{print $1}')
	# overlaprep2=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep2.bed | wc -l | awk '{print $1}')
	# overlapmergedonly=$(bedtools intersect -v -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | bedtools intersect -v -a - -b manuscript/individual/peaks_${sample}_rep2.bed | wc -l | awk '{print $1}')
	# lenmerged=$(awk '{l=$3-$2; s+=l} END {print s}' manuscript/peaks_${sample}.bed)
	# lenrep1=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | awk '{l=$3-$2; s+=l} END {print s}')
	# lenrep2=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep2.bed | awk '{l=$3-$2; s+=l} END {print s}')
	# lenmergedonly=$(bedtools intersect -v -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | bedtools intersect -v -a - -b manuscript/individual/peaks_${sample}_rep2.bed | awk '{l=$3-$2; s+=l} END {print s}')
	# awk -v OFS="\t" -v s=$sample -v x=$peakmerged -v y=$lenmerged -v a=$peakrep1 -v b=$overlaprep1 -v c=$lenrep1 -v d=$peakrep2 -v e=$overlaprep2 -v f=$lenrep2 -v g=$overlapmergedonly -v h=$lenmergedonly 'BEGIN {print s,x,a,b,b/x*100"%",c/y*100"%",d,e,e/x*100"%",f/y*100"%",g,g/x*100"%",h/y*100"%"}' >> manuscript/Stats_Total.txt
# done

# ##### To calculate the final peak stat table v2

# printf "Sample\tNbPeaks_Merged\tNbPeaks_rep1\tNbPeaks_Overlap_rep1\tPercentPeaks_Overlap_rep1(vsMerged)\tPercentBp_Overlap_rep1(vsMerged)\tNbPeaks_rep2\tNbPeaks_Overlap_rep2\tPercentPeaks_Overlap_rep2(vsMerged)\tPercentBp_Overlap_rep2(vsMerged)\tNbPeaks_MergedOnly\tPercentPeaks_MergedOnly(vsMerged)\tPercentBp_MergedOnly(vsMerged)\tNbPeaks_RepsOnly\n" > manuscript/Stats_Total2.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# peakmerged=$(wc -l manuscript/peaks_${sample}.bed | awk '{print $1}')
	# peakrep1=$(wc -l manuscript/individual/peaks_${sample}_rep1.bed | awk '{print $1}')
	# peakrep2=$(wc -l manuscript/individual/peaks_${sample}_rep2.bed | awk '{print $1}')
	# overlaprep1=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | wc -l | awk '{print $1}')
	# overlaprep2=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep2.bed | wc -l | awk '{print $1}')
	# cat manuscript/individual/peaks_${sample}_rep*.bed | sort -k1,1n -k2,2n > manuscript/individual/merged_${sample}.bed
	# overlapmergedonly=$(bedtools merge -i manuscript/individual/merged_${sample}.bed | bedtools intersect -v -a manuscript/peaks_${sample}.bed -b - | wc -l | awk '{print $1}')
	# overlaprepsonly=$(bedtools merge -i manuscript/individual/merged_${sample}.bed | bedtools intersect -v -a - -b manuscript/peaks_${sample}.bed | wc -l | awk '{print $1}')
	# lenmerged=$(awk '{l=$3-$2; s+=l} END {print s}' manuscript/peaks_${sample}.bed)
	# lenrep1=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | awk '{l=$3-$2; s+=l} END {print s}')
	# lenrep2=$(bedtools intersect -a manuscript/peaks_${sample}.bed -b manuscript/individual/peaks_${sample}_rep2.bed | awk '{l=$3-$2; s+=l} END {print s}')
	# lenmergedonly=$(bedtools merge -i manuscript/individual/merged_${sample}.bed | bedtools intersect -v -a manuscript/peaks_${sample}.bed -b - | awk '{l=$3-$2; s+=l} END {print s}')
	# lenrepsonly=$(bedtools merge -i manuscript/individual/merged_${sample}.bed | bedtools intersect -v -a - -b manuscript/peaks_${sample}.bed | awk '{l=$3-$2; s+=l} END {print s}')
	# awk -v OFS="\t" -v s=$sample -v x=$peakmerged -v y=$lenmerged -v a=$peakrep1 -v b=$overlaprep1 -v c=$lenrep1 -v d=$peakrep2 -v e=$overlaprep2 -v f=$lenrep2 -v g=$overlapmergedonly -v h=$lenmergedonly -v i=$overlaprepsonly 'BEGIN {print s,x,a,b,b/x*100"%",c/y*100"%",d,e,e/x*100"%",f/y*100"%",g,g/x*100"%",h/y*100"%",i}' >> manuscript/Stats_Total2.txt
# done

##### To calculate the final peak stat table v3

# printf "Sample\tPeak\tMerged\tRep1\tRep2\n" > manuscript/Stats_Total3.txt

# for sample in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# printf "$sample\n"
	# cat manuscript/peaks_${sample}.bed manuscript/individual/peaks_${sample}_rep*.bed | sort -k1,1n -k2,2n > manuscript/individual/all_${sample}.bed
	# bedtools merge -i manuscript/individual/all_${sample}.bed | awk -v OFS="\t" '{print $1,$2,$3,"Peak_"NR}' > manuscript/individual/all2_${sample}.bed
	# bedtools intersect -wa -a manuscript/individual/all2_${sample}.bed -b manuscript/individual/peaks_${sample}_rep1.bed | awk '{print $4}' | uniq > manuscript/individual/inter_${sample}_rep1.bed
	# bedtools intersect -wa -a manuscript/individual/all2_${sample}.bed -b manuscript/individual/peaks_${sample}_rep2.bed | awk '{print $4}' | uniq > manuscript/individual/inter_${sample}_rep2.bed	
	# bedtools intersect -wa -a manuscript/individual/all2_${sample}.bed -b manuscript/peaks_${sample}.bed | awk '{print $4}' | uniq > manuscript/individual/inter_${sample}_merged.bed
	# max=$(wc -l manuscript/individual/all2_${sample}.bed | awk '{print $1}')
	# i=1
	# while [ "$i" -le "$max" ]
	# do
		# printf "Peak $i\n"
		# a=$(grep -w "Peak_$i" manuscript/individual/inter_${sample}_merged.bed | wc -l | awk '{print $1}')
		# b=$(grep -w "Peak_$i" manuscript/individual/inter_${sample}_rep1.bed | wc -l | awk '{print $1}')
		# c=$(grep -w "Peak_$i" manuscript/individual/inter_${sample}_rep2.bed | wc -l | awk '{print $1}')
		# awk -v OFS="\t" -v s=$sample -v i=$i -v a=$a -v b=$b -v c=$c 'BEGIN {print s,s"_Peak_"i,a,b,c}' >> manuscript/Stats_Total3.txt
		# i=$((i+1))
	# done
# done








