#!/bin/bash
#$ -V
#$ -cwd
#$ -pe threads 12
#$ -l m_mem_free=2G
#$ -l tmp_free=4G
#$ -o LogmCreaders.log
#$ -j y
#$ -N mCreaders

set -e -o pipefail

printf "\n\n"
date
printf "\n"

export threads=$NSLOTS

# ### Script to compare our ChIPseq to Jacobsen's for SUVH1/3, MBD2/5/6

# rm -f total_samplefile.txt

# # ## For our ChIPseq

# printf "TF_MBD1_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821957\tSRA\tSE\tTAIR10\nTF_MBD1_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821956\tSRA\tSE\tTAIR10\nTF_MBD1_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821955\tSRA\tSE\tTAIR10\nTF_MBD1_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821953\tSRA\tSE\tTAIR10\nTF_MBD2_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821952\tSRA\tSE\tTAIR10\nTF_MBD2_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821951\tSRA\tSE\tTAIR10\nTF_MBD2_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821950\tSRA\tSE\tTAIR10\nTF_MBD2_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821949\tSRA\tSE\tTAIR10\nTF_MBD4_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821948\tSRA\tSE\tTAIR10\nTF_MBD4_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821947\tSRA\tSE\tTAIR10\nTF_MBD4_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821946\tSRA\tSE\tTAIR10\nTF_MBD4_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821945\tSRA\tSE\tTAIR10\nTF_MBD5_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821944\tSRA\tSE\tTAIR10\nTF_MBD5_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821942\tSRA\tSE\tTAIR10\nTF_MBD5_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821941\tSRA\tSE\tTAIR10\nTF_MBD5_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821940\tSRA\tSE\tTAIR10\nTF_MBD6_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821939\tSRA\tSE\tTAIR10\nTF_MBD6_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821938\tSRA\tSE\tTAIR10\nTF_MBD6_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821937\tSRA\tSE\tTAIR10\nTF_MBD6_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821936\tSRA\tSE\tTAIR10\nTF_SUVH1_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821935\tSRA\tSE\tTAIR10\nTF_SUVH1_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821934\tSRA\tSE\tTAIR10\nTF_SUVH1_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821933\tSRA\tSE\tTAIR10\nTF_SUVH1_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821931\tSRA\tSE\tTAIR10\nTF_SUVH3_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821930\tSRA\tSE\tTAIR10\nTF_SUVH3_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821929\tSRA\tSE\tTAIR10\nTF_SUVH3_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821928\tSRA\tSE\tTAIR10\nTF_SUVH3_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821927\tSRA\tSE\tTAIR10\nTF_MBD2vsWT_Lister\tCol0\tseedlings\tIP\tRep1\tSRR27821952\tSRA\tSE\tTAIR10\nTF_MBD2vsWT_Lister\tCol0\tseedlings\tInput\tRep1\tSRR27821926\tSRA\tSE\tTAIR10\nTF_MBD2vsWT_Lister\tCol0\tseedlings\tIP\tRep2\tSRR27821950\tSRA\tSE\tTAIR10\nTF_MBD2vsWT_Lister\tCol0\tseedlings\tInput\tRep2\tSRR27821926\tSRA\tSE\tTAIR10\n" >> total_samplefile.txt

# # # ## For Jaco

# printf "TF_MBD1_Jaco\tCol0\tflowers\tIP\tRep1\tSRR25100656\tSRA\tPE\tTAIR10\nTF_MBD1_Jaco\tCol0\tflowers\tInput\tRep1\tSRR25100622\tSRA\tPE\tTAIR10\nTF_MBD1_Jaco\tCol0\tflowers\tIP\tRep2\tSRR25100655\tSRA\tPE\tTAIR10\nTF_MBD1_Jaco\tCol0\tflowers\tInput\tRep2\tSRR25100624\tSRA\tPE\tTAIR10\nTF_MBD2_Jaco\tCol0\tflowers\tIP\tRep1\tSRR25100660\tSRA\tPE\tTAIR10\nTF_MBD2_Jaco\tCol0\tflowers\tInput\tRep1\tSRR25100666\tSRA\tPE\tTAIR10\nTF_MBD2_Jaco\tCol0\tflowers\tIP\tRep2\tSRR25100662\tSRA\tPE\tTAIR10\nTF_MBD2_Jaco\tCol0\tflowers\tInput\tRep2\tSRR25100522\tSRA\tPE\tTAIR10\nTF_MBD4_Jaco\tCol0\tflowers\tIP\tRep1\tSRR25100623\tSRA\tPE\tTAIR10\nTF_MBD4_Jaco\tCol0\tflowers\tInput\tRep1\tSRR25100654\tSRA\tPE\tTAIR10\nTF_MBD4_Jaco\tCol0\tflowers\tIP\tRep2\tSRR25100619\tSRA\tPE\tTAIR10\nTF_MBD4_Jaco\tCol0\tflowers\tInput\tRep2\tSRR25100617\tSRA\tPE\tTAIR10\nTF_MBD5_Jaco\tCol0\tflowers\tIP\tRep1\tSRR13482974\tSRA\tPE\tTAIR10\nTF_MBD5_Jaco\tCol0\tflowers\tInput\tRep1\tSRR13482978\tSRA\tPE\tTAIR10\nTF_MBD5_Jaco\tCol0\tflowers\tIP\tRep2\tSRR13482979\tSRA\tPE\tTAIR10\nTF_MBD5_Jaco\tCol0\tflowers\tInput\tRep2\tSRR13482983\tSRA\tPE\tTAIR10\nTF_MBD6_Jaco\tCol0\tflowers\tIP\tRep1\tSRR13482976\tSRA\tPE\tTAIR10\nTF_MBD6_Jaco\tCol0\tflowers\tInput\tRep1\tSRR13482978\tSRA\tPE\tTAIR10\nTF_MBD6_Jaco\tCol0\tflowers\tIP\tRep2\tSRR13482981\tSRA\tPE\tTAIR10\nTF_MBD6_Jaco\tCol0\tflowers\tInput\tRep2\tSRR13482983\tSRA\tPE\tTAIR10\nTF_SUVH1_Jaco\tCol0\tflowers\tIP\tRep1\tSRR7727943\tSRA\tSE\tTAIR10\nTF_SUVH1_Jaco\tCol0\tflowers\tInput\tRep1\tSRR7727947\tSRA\tSE\tTAIR10\nTF_SUVH1_Jaco\tCol0\tflowers\tIP\tRep2\tSRR7727944\tSRA\tSE\tTAIR10\nTF_SUVH1_Jaco\tCol0\tflowers\tInput\tRep2\tSRR7727948\tSRA\tSE\tTAIR10\n" >> total_samplefile.txt

### To run the maizecode pipeline

# qsub scripts/Maizecode.sh -f total_samplefile -p /grid/martienssen/home/jcahn/nlsas/Genomes/Arabidopsis

### To make coverage tracks for all the K9

# rm -f coverage_analysis_samplefile.txt
# while read data line tissue sample rep sampleID path paired ref
# do
	# folder="/grid/martienssen/home/jcahn/norepl/projects/readersmC/TF"
	# label=${data##TF_}
	# name=${label}_${sample}_${rep}
	# bam="${folder}/mapped/${line}_${name}.bam"
	# bw="${folder}/tracks/coverage_${name}.bw"
	# if [ ! -e ${bam}.bai ]; then
		# printf "Indexing ${bam}\n"
		# samtools index -@ ${threads} ${bam}
	# fi
	# if [ ! -e ${bw} ]; then
		# printf "Making ${bw}\n"
		# bamCoverage -b ${bam} -o ${bw} -bs 10 -p ${threads}
	# fi
	# ref="/grid/martienssen/home/jcahn/nlsas/Genomes/Arabidopsis/TAIR10"
	# printf "${data}\t${line}\t${tissue}\t${sample}\t${name}\t${ref}\n" >> coverage_analysis_samplefile.txt
# done < coverage_samplefile.txt

# ### To do some heatmaps on all genes

# if [ ! -e analysis ]; then
	# mkdir analysis
	# mkdir analysis/plots
	# mkdir analysis/data
# fi

# cp TF/tracks/TAIR10_all_genes.bed analysis/data/TAIR10_all_genes.bed

# pathtobw="/grid/martienssen/home/jcahn/norepl/projects/readersmC/TF/tracks"

# sort -k1,1 total_analysis_samplefile.txt > sorted_analysis_samplefile.txt

# # for samp in sorted
# for samp in coverage
# do
	# samplefile="${samp}_analysis_samplefile.txt"
	# bwfiles=()
	# samplenames=()
	# orderedmark=()
	# colors=()
	# while read data line tissue sample paired ref_dir
	# do
		# label=${data##TF_}
		# name=${line}_${label}
		# bw="${pathtobw}/${name}_merged.bw"

		# samplenames+=("${label}")
		# bwfiles+=("${bw}")

		# case "${label}" in
			# *MBD1*)		color="black";;
			# *MBD2*) 	color="red";;
			# *MBD5*) 	color="blue";;
			# *MBD6*) 	color="purple";;
			# *SUVH1*) 	color="#a3514b";;
			# *SUVH3*) 	color="#a667a5";;
		# esac
		# case "${label}" in
			# *Jaco)		col="Blues";;
			# *Lister) 	col="Reds";;
		# esac		
		# linecolors+=("${color}")
		# colcolors+=("${col}")
	# done < ${samplefile}

	# # for target in all_genes
	# for target in GbM_genes pseudogenes unmethylated_genes
	# do
		# title="${target}_${samp}"
		# file="analysis/data/TAIR10_${target}.bed"
		# nb=$(wc -l ${file} | awk '{print $1}')
		# regionlab="${target} (${nb})"
		# param="-bs 10 -a 1000 -b 1000 -m 2000"
		# printf "\nComputing matrix for ${title}\n"
		# computeMatrix scale-regions -R ${file} -S ${bwfiles[@]} ${param} -p ${threads} -o analysis/data/matrix_${title}.gz	
		
		# printf "\nGetting Z scales for ${title}\n"
		# computeMatrixOperations dataRange -m analysis/data/matrix_${title}.gz > analysis/data/values_z_${title}.txt
		# zmins=()
		# zmaxs=()
		# for sample in ${samplenames[@]}
		# do
			# mini=$(grep ${sample} analysis/data/values_z_${title}.txt | awk '{print $5}')
			# maxi=$(grep ${sample} analysis/data/values_z_${title}.txt | awk '{print $6}')
	
			# zmins+=("${mini}")
			# zmaxs+=("${maxi}")
		# done
		# printf "\nGetting Y values for ${title}\n"
		# plotProfile -m analysis/data/matrix_${title}.gz -out analysis/plots/temp_${title}_profile.pdf --averageType mean --outFileNameData analysis/data/values_y_${title}.txt
		# rm -f data/plots/temp_${title}_profile.pdf
		# printf "\nGetting Y scales for ${title}\n"
		# ymins=()
		# ymaxs=()
		# for sample in ${samplenames[@]}
		# do
			# ymini=$(grep ${sample} analysis/data/values_y_${title}.txt | awk '{m=$3; for(i=3;i<=NF;i++) if ($i<m) m=$i; print m}' | awk 'BEGIN {m=99999} {if ($1<m) m=$1} END {if (m<0) a=m*1.2; else a=m*0.8; print a}')
			# ymaxi=$(grep ${sample} analysis/data/values_y_${title}.txt | awk '{m=$3; for(i=3;i<=NF;i++) if ($i>m) m=$i; print m}' | awk 'BEGIN {m=-99999} {if ($1>m) m=$1} END {if (m<0) a=m*0.8; else a=m*1.2; print a}')
			# ymins+=("${ymini}")
			# ymaxs+=("${ymaxi}")
		# done
		
		# printf "Plotting profile for ${title}\n"
		# plotProfile -m analysis/data/matrix_${title}.gz -out analysis/plots/profile_${title}.pdf --plotType 'lines' --averageType 'mean' --regionsLabel "${regionlab}" --samplesLabel ${samplenames[@]} --perGroup --colors ${linecolors[@]}
		# plotProfile -m analysis/data/matrix_${title}.gz -out analysis/plots/profile_${title}_ind.pdf --plotType 'lines' --averageType 'mean' --regionsLabel "${regionlab}" --samplesLabel ${samplenames[@]} --colors ${linecolors[@]} --yMin ${ymins[@]} --yMax ${ymaxs[@]}
		# printf "Plotting heatmap for ${title}\n"
		# plotHeatmap -m analysis/data/matrix_${title}.gz -out analysis/plots/heatmap_${title}.pdf --regionsLabel "${regionlab}" --samplesLabel ${samplenames[@]} --colorMap ${colcolors[@]} --zMin ${zmins[@]} --zMax ${zmaxs[@]} --yMin ${ymins[@]} --yMax ${ymaxs[@]}
	# done		
# done

# ### Plot sample correlations

# pathtobam="/grid/martienssen/home/jcahn/norepl/projects/readersmC/TF/mapped"
# pathtobw="/grid/martienssen/home/jcahn/norepl/projects/readersmC/TF/tracks"

# bamsamples=()
# bwsamples=()
# bamlabels=()
# bwlabels=()
# for prot in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "${prot}" in
		# MBD1)	color="grey";;
		# MBD2)	color="blue";;
		# MBD4)	color="green";;
		# MBD5)	color="purple";;
		# MBD6)	color="red";;
		# SUVH1)	color="orange";;
		# SUVH3)	color="yellow";;
	# esac
	# for rep in Rep1 Rep2
	# do	
		# if [ -e ${pathtobam}/Col0_${prot}_Lister_IP_${rep}.bam ]; then
			# bamsamples+=(${pathtobam}/Col0_${prot}_Lister_IP_${rep}.bam)
			# bwsamples+=(${pathtobw}/Col0_${prot}_Lister_${rep}.bw)
			# bamlabels+=("${prot}_Lister_${rep}")
			# bwlabels+=("${prot}_Lister_${rep}")
			# bwshape+=("s")
			# bwcolor+=("${color}")
		# fi
		# if [ -e ${pathtobam}/Col0_${prot}_Lister_Input_${rep}.bam ]; then
			# bamsamples+=(${pathtobam}/Col0_${prot}_Lister_Input_${rep}.bam)
			# bamlabels+=("${prot}_Lister_${rep}_Input")
		# fi
		# if [ -e ${pathtobam}/Col0_${prot}_Jaco_IP_${rep}.bam ]; then
			# bamsamples+=(${pathtobam}/Col0_${prot}_Jaco_IP_${rep}.bam)
			# bwsamples+=(${pathtobw}/Col0_${prot}_Jaco_${rep}.bw)
			# bamlabels+=("${prot}_Jaco_${rep}_Input")
			# bwlabels+=("${prot}_Jaco_${rep}")
			# bwshape+=("o")
			# bwcolor+=("${color}")
		# fi
		# if [ -e ${pathtobam}/Col0_${prot}_Jaco_Input_${rep}.bam ]; then
			# bamsamples+=(${pathtobam}/Col0_${prot}_Jaco_Input_${rep}.bam)
			# bamlabels+=("${prot}_Jaco_${rep}")
		# fi
	# done
	
# done
# ## to plot whole genome correlations

# # printf "Making BAM correlations on whole genome\n"
# # multiBamSummary bins -b ${bamsamples[@]} -o analysis/data/correlation_bam_genome.npz --labels ${bamlabels[@]} -p ${threads}
# # plotCorrelation --corData analysis/data/correlation_bam_genome.npz --corMethod spearman --whatToPlot heatmap --plotNumbers --plotFile analysis/plots/heatmap_bam_genome_spearman.pdf --plotTitle "Spearman correlation whole genome (bam)"
# printf "Making BW correlations on whole genome\n"
# # multiBigwigSummary bins -b ${bwsamples[@]} -o analysis/data/correlation_bw_genome.npz --labels ${bwlabels[@]} -p ${threads}
# # plotCorrelation --corData analysis/data/correlation_bw_genome.npz --corMethod spearman --whatToPlot heatmap --plotNumbers --plotFile analysis/plots/heatmap_bw_genome_spearman.pdf --plotTitle "Spearman correlation whole genome (bw)"
# # plotCorrelation --corData analysis/data/correlation_bw_genome.npz --corMethod pearson --whatToPlot heatmap --plotNumbers --plotFile analysis/plots/heatmap_bw_genome_pearson.pdf --plotTitle "Pearson correlation whole genome (bw)"
	
# plotPCA --corData analysis/data/correlation_bw_genome.npz --plotFile analysis/plots/PCA_bw_genome.pdf --plotTitle "PCA whole genome (bw)" --colors ${bwcolor[@]} --markers ${bwshape[@]}
	
# ## to plot correlations at GbM
# # bedfile=("analysis/data/TAIR10_GbM_genes.bed")
# # printf "Making BW correlations on GbM genes\n"
# # multiBigwigSummary BED-file --BED ${bedfile} -b ${bwsamples[@]} -o analysis/data/correlation_bw_GbM.npz --labels ${bwlabels[@]} -p ${threads}
# # plotCorrelation --corData analysis/data/correlation_bw_GbM.npz --corMethod spearman --whatToPlot heatmap --plotNumbers --plotFile analysis/plots/heatmap_bw_GbM_spearman.pdf --plotTitle "Spearman correlation GbM genes (bw)"
# # plotCorrelation --corData analysis/data/correlation_bw_GbM.npz --corMethod pearson --whatToPlot heatmap --plotNumbers --plotFile analysis/plots/heatmap_bw_GbM_pearson.pdf --plotTitle "Pearson correlation GbM genes (bw)"

# # plotPCA --corData analysis/data/correlation_bw_GbM.npz --plotFile analysis/plots/PCA_bw_GbM.pdf --plotTitle "PCA GbM genes (bw)" 

############################################################################################################################
### To do an Upset plot on the peaks, comparing MBDs with genes with/without mC and short and long TEs

# ref_dir="/grid/martienssen/home/jcahn/nlsas/Genomes/Arabidopsis/TAIR10"

# nameout="upset_readers"

# tegff="/grid/martienssen/home/jcahn/nlsas/Genomes/Arabidopsis/TAIR10_GFF3_genes_transposons.gff"
# awk -v OFS="\t" 'NR>1 && $3=="transposable_element" && ($5-$4>2000) {print $1,$4+1,$5,$6,$7,$9}' ${tegff} | awk -F'[=;]' -v OFS="\t" '{print $1,$2}' | awk -v OFS="\t" '{print $1,$2,$3,$7,$4,$5}' > analysis/data/TAIR10_long_TEs.bed
# awk -v OFS="\t" 'NR>1 && $3=="transposable_element" && ($5-$4<=2000) {print $1,$4+1,$5,$6,$7,$9}' ${tegff} | awk -F'[=;]' -v OFS="\t" '{print $1,$2}' | awk -v OFS="\t" '{print $1,$2,$3,$7,$4,$5}' > analysis/data/TAIR10_short_TEs.bed

# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6
# do
	# for lab in Jaco Lister
	# do
		# sample="${mbd}_${lab}"
		# file="TF/peaks/best_peaks_Col0_${sample}.bed"
		# awk -v OFS="\t" -v s=${sample} '($1~/^[0-9]/ || $1~/^chr[0-9]/ || $1~/^Chr[0-9]/ ) {print $1,$2,$3,s}' ${file} | sort -k1,1 -k2,2n -u >> analysis/data/tmp_${nameout}.bed
	# done
# done

# sort -k1,1 -k2,2n analysis/data/tmp_${nameout}.bed > analysis/data/tmp2_${nameout}.bed
# bedtools merge -i analysis/data/tmp2_${nameout}.bed -c 4 -o distinct | bedtools sort -g ${ref_dir}/chrom.sizes | awk -v OFS="\t" '{print $1,$2,$3,"Peak_"NR,$4}'> analysis/data/v0_${nameout}_peaks_merged.bed

# rm -f analysis/data/tmp*

# ## To get distance to closest gene (and the gene model name)

# i=0
# for type in long_TEs short_TEs GbM_genes unmethylated_genes pseudogenes
# do
	# j=$((i+1))
	# bedtools intersect -wa -a analysis/data/v${i}_${nameout}_peaks_merged.bed -b analysis/data/TAIR10_${type}.bed | awk -v OFS="\t" -v t=${type} '{print $1,$2,$3,$4,$5,t}' > analysis/data/temp${i}_${nameout}.bed
	# bedtools intersect -wa -v -a analysis/data/v${i}_${nameout}_peaks_merged.bed -b analysis/data/TAIR10_${type}.bed | awk -v OFS="\t" -v t=${type} '{print $1,$2,$3,$4,$5,"intergenic"}' > analysis/data/v${j}_${nameout}_peaks_merged.bed
	# i=$((i+1))
# done

# cat analysis/data/temp*_${nameout}.bed analysis/data/v5_${nameout}_peaks_merged.bed | sort -Vk4,4 > analysis/data/annotated_peaks_${nameout}.bed

# rm -f analysis/data/temp*_${nameout}.bed
# rm -f analysis/data/v*_${nameout}_peaks_merged.bed

# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6
# do
	# for lab in Jaco Lister
	# do
		# sample="${mbd}_${lab}"
		# printf "${sample}\n" > analysis/data/temp_col_${nameout}_${sample}.txt
		# awk -v OFS="\t" -v s=${sample} '{if ($0 ~ s) print "1"; else print "0"}' analysis/data/annotated_peaks_${nameout}.bed >> analysis/data/temp_col_${nameout}_${sample}.txt
	# done
# done

# awk -v OFS="\t" 'BEGIN {printf "PeakID\tGroup\n"} {print $4,$6}' analysis/data/annotated_peaks_${nameout}.bed > analysis/data/temp_col_${nameout}_AAA.txt

# paste analysis/data/temp_col_${nameout}_*.txt | uniq > analysis/data/matrix_${nameout}.txt
# rm -f analysis/data/temp_col_${nameout}_*.txt

# ## To make an Upset plot highlighting peaks in gene bodies
# nameout="upset_readers"

# printf "\nCreating Upset plot for ${nameout} with R version:\n"
# R --version
# Rscript --vanilla R_Upset_mCreaders.r ${nameout} analysis/data/matrix_${nameout}.txt

# ### To do an upset for each Lab

# ref_dir="/grid/martienssen/home/jcahn/nlsas/Genomes/Arabidopsis/TAIR10"
# for lab in Jaco Lister
# do
	# nameout="upset_readers_${lab}"
	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="${mbd}_${lab}"
		# file="TF/peaks/best_peaks_Col0_${sample}.bed"
		# if [ -e ${file} ]; then
			# awk -v OFS="\t" -v s=${sample} '($1~/^[0-9]/ || $1~/^chr[0-9]/ || $1~/^Chr[0-9]/ ) {print $1,$2,$3,s}' ${file} | sort -k1,1 -k2,2n -u >> analysis/data/tmp_${nameout}.bed
		# fi
	# done

	# sort -k1,1 -k2,2n analysis/data/tmp_${nameout}.bed > analysis/data/tmp2_${nameout}.bed
	# bedtools merge -i analysis/data/tmp2_${nameout}.bed -c 4 -o distinct | bedtools sort -g ${ref_dir}/chrom.sizes | awk -v OFS="\t" '{print $1,$2,$3,"Peak_"NR,$4}'> analysis/data/v0_${nameout}_peaks_merged.bed

	# rm -f analysis/data/tmp*

	# ## To get distance to closest gene (and the gene model name)

	# i=0
	# for type in long_TEs short_TEs GbM_genes unmethylated_genes pseudogenes
	# do
		# j=$((i+1))
		# bedtools intersect -wa -a analysis/data/v${i}_${nameout}_peaks_merged.bed -b analysis/data/TAIR10_${type}.bed | awk -v OFS="\t" -v t=${type} '{print $1,$2,$3,$4,$5,t}' > analysis/data/temp${i}_${nameout}.bed
		# bedtools intersect -wa -v -a analysis/data/v${i}_${nameout}_peaks_merged.bed -b analysis/data/TAIR10_${type}.bed | awk -v OFS="\t" -v t=${type} '{print $1,$2,$3,$4,$5,"intergenic"}' > analysis/data/v${j}_${nameout}_peaks_merged.bed
		# i=$((i+1))
	# done

	# cat analysis/data/temp*_${nameout}.bed analysis/data/v5_${nameout}_peaks_merged.bed | sort -Vk4,4 > analysis/data/annotated_peaks_${nameout}.bed

	# rm -f analysis/data/temp*_${nameout}.bed
	# rm -f analysis/data/v*_${nameout}_peaks_merged.bed

	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="${mbd}_${lab}"
		# if [ -e TF/peaks/selected_peaks_Col0_${sample}.narrowPeak ]; then
			# printf "${sample}\n" > analysis/data/temp_col_${nameout}_${sample}.txt
			# awk -v OFS="\t" -v s=${sample} '{if ($0 ~ s) print "1"; else print "0"}' analysis/data/annotated_peaks_${nameout}.bed >> analysis/data/temp_col_${nameout}_${sample}.txt
		# fi
	# done

	# awk -v OFS="\t" 'BEGIN {printf "PeakID\tGroup\n"} {print $4,$6}' analysis/data/annotated_peaks_${nameout}.bed > analysis/data/temp_col_${nameout}_AAA.txt

	# paste analysis/data/temp_col_${nameout}_*.txt | uniq > analysis/data/matrix_${nameout}.txt
	# rm -f analysis/data/temp_col_${nameout}_*.txt

	# # To make an Upset plot highlighting peaks in gene bodies
	# printf "\nCreating Upset plot for ${nameout} with R version:\n"
	# R --version
	# Rscript --vanilla R_Upset_mCreaders.r ${nameout} analysis/data/matrix_${nameout}.txt

# done

############################################################################################################################
### To look at mC reader peaks in the chromatin states from Sequeira-Mendes et al. 2014. Plant Cell

# for i in {1..9}
# do
	# awk -v OFS="\t" -v i=${i} '{print "Chr"$0,"State_"i}' analysis/data/TAIR10_state${i}.txt >> analysis/data/temp_TAIR10_chromatin_states.txt
# done

# sort -k1,1 -k2,2n analysis/data/temp_TAIR10_chromatin_states.txt > analysis/data/TAIR10_chromatin_states.txt
# rm -f analysis/data/temp_TAIR10_chromatin_states.txt

# awk -v OFS="\t" '{print $4,$3-$2+1}' analysis/data/TAIR10_chromatin_states.txt | sort -k1,1 | awk -v OFS="\t" -v s="Genome" 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' > analysis/data/Readers_in_chromatin_states.txt
# for lab in Jaco Lister
# do
	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="${mbd}_${lab}"
		# file="TF/peaks/best_peaks_Col0_${sample}.bed"
		# if [ -e ${file} ]; then
			 # bedtools intersect -wo -a TF/peaks/best_peaks_Col0_${sample}.bed -b analysis/data/TAIR10_chromatin_states.txt | awk -v OFS="\t" '{print $14,$15}' | sort -k1,1 | awk -v OFS="\t" -v s=${sample} 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' >> analysis/data/Readers_in_chromatin_states.txt
		# fi
	# done
# done

# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)

	# tab<-read.delim("analysis/data/Readers_in_chromatin_states.txt", header=FALSE, 
                # col.names=c("Reader","State","Coverage"))

	# plot<-ggplot(tab, aes(Reader, Coverage, fill=State)) +
		# geom_bar(stat = "identity", position = "fill") +
		# scale_fill_manual(values=c("State_1"="red","State_2"="salmon","State_3"="violet","State_4"="gold",
                             # "State_5"="grey","State_6"="brown","State_7"="green","State_8"="lightblue",
                             # "State_9"="blue")) +
		# theme_bw() +
		# theme(axis.text.x = element_text(size=10, angle=45, vjust = 1, hjust = 1),
			# axis.title.x = element_blank())
		
	# pdf(paste0("analysis/plots/Readers_in_chromatin_states.pdf"),8,8)
	# print(plot)
	# dev.off()

# EOF

############################################################################################################################

# pathtochip="/grid/martienssen/home/jcahn/norepl/projects/readersmC/TF/tracks"
# pathtomc="/grid/martienssen/home/jcahn/nlsas/projects/mC_DDM1/methylcall"

# bedfile="analysis/data/TAIR10_GbM_genes.bed"
# for lab in Jaco Lister
# do
	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="Col0_${mbd}_${lab}"
		# file="${pathtochip}/${sample}_merged.bw"
		# if [ -e ${file} ]; then
			# for context in CG CHG CHH
			# do
				# printf "${sample} ${context}\n"
				# multiBigwigSummary bins -b ${file} ${pathtomc}/WT_A_${context}.bw -o analysis/data/${sample}_${context}.npz --labels ${sample} ${context} -p ${threads} --chromosomesToSkip "ChrC ChrM" --binSize 5000
				# plotCorrelation --corData analysis/data/${sample}_${context}.npz --corMethod pearson --whatToPlot scatterplot --plotFile analysis/plots/${sample}_${context}_pearson.pdf --plotTitle "${sample} vs WT ${context}" --removeOutliers
			# done
			
			# multiBigwigSummary BED-file --BED ${bedfile} -b ${file} ${pathtomc}/WT_A_CG.bw -o analysis/data/${sample}_GbM.npz --labels ${sample} mCG -p ${threads}
			# plotCorrelation --corData analysis/data/${sample}_GbM.npz --corMethod pearson --whatToPlot scatterplot --plotFile analysis/plots/${sample}_GbM_pearson.pdf --plotTitle "${sample} vs mCG at GbM (pearson)"
			# plotCorrelation --corData analysis/data/${sample}_GbM.npz --corMethod spearman --whatToPlot scatterplot --plotFile analysis/plots/${sample}_GbM_spearman.pdf --plotTitle "${sample} vs mCG at GbM (spearman)"
		# fi
	# done
# done

# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# sample1="Col0_${mbd}_Lister"
	# sample2="Col0_${mbd}_Jaco"
	# file1="${pathtochip}/${sample1}_merged.bw"
	# file2="${pathtochip}/${sample2}_merged.bw"
	# if [ -e ${file1} ] && [ -e ${file2} ]; then
		# printf "${mbd}\n"
		# multiBigwigSummary bins -b ${file1} ${file2} -o analysis/data/${mbd}.npz --labels "Lister" "Jaco" -p ${threads} --chromosomesToSkip "ChrC ChrM" --binSize 5000
		# plotCorrelation --corData analysis/data/${mbd}.npz --corMethod pearson --whatToPlot scatterplot --plotFile analysis/plots/${mbd}_pearson.pdf --plotTitle "${mbd} in Lister vs Jaco (pearson)"
		# plotCorrelation --corData analysis/data/${mbd}.npz --corMethod spearman --whatToPlot scatterplot --plotFile analysis/plots/${mbd}_spearman.pdf --plotTitle "${mbd} in Lister vs Jaco (spearman)"
	# fi
# done


############################################################################################################################
### To look at mC reader peaks in the chromatin states from PCSD Liu et al. 2018. NAR

# for i in {1..36}
# do
	# state="S${i}"
	# if [ ! -s analysis/data/PCSD_${state}.txt ]; then
		# wget https://systemsbiology.cau.edu.cn/chromstates/download/At_segments_${state} -O analysis/data/PCSD_${state}.txt
	# fi
	# sed -i 's/chr/Chr/' analysis/data/PCSD_${state}.txt
# done

# for i in {1..36}
# do
	# state="S${i}"
	# awk -v OFS="\t" -v i=${i} '{print $1,$2,$3,"State_"i}' analysis/data/PCSD_${state}.txt >> analysis/data/temp_PCSD_chromatin_states.txt
# done

# sort -k1,1 -k2,2n analysis/data/temp_PCSD_chromatin_states.txt > analysis/data/PCSD_chromatin_states.txt
# rm -f analysis/data/temp_PCSD_chromatin_states.txt

# awk -v OFS="\t" '{print $4,$3-$2+1}' analysis/data/PCSD_chromatin_states.txt | sort -k1,1 | awk -v OFS="\t" -v s="Genome" 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' > analysis/data/Readers_in_chromatin_states_PCSD.txt
# for lab in Jaco Lister
# do
	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="${mbd}_${lab}"
		# file="TF/peaks/best_peaks_Col0_${sample}.bed"
		# if [ -e ${file} ]; then
			 # bedtools intersect -wo -a TF/peaks/best_peaks_Col0_${sample}.bed -b analysis/data/PCSD_chromatin_states.txt | awk -v OFS="\t" '{print $14,$15}' | sort -k1,1 | awk -v OFS="\t" -v s=${sample} 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' >> analysis/data/Readers_in_chromatin_states_PCSD.txt
		# fi
	# done
# done

# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)

	# tab<-read.delim("analysis/data/Readers_in_chromatin_states_PCSD.txt", header=FALSE, 
                # col.names=c("Reader","State","Coverage"))
	# tab$State<-factor(tab$State, levels = paste("State", 1:36, sep = "_"))

	# plot<-ggplot(tab, aes(Reader, Coverage, fill=State)) +
		# geom_bar(stat = "identity", position = "fill") +
		# theme_bw() +
		# theme(axis.text.x = element_text(size=10, angle=45, vjust = 1, hjust = 1),
			# axis.title.x = element_blank())
		
	# pdf(paste0("analysis/plots/Readers_in_chromatin_states_PCSD.pdf"),8,8)
	# print(plot)
	# dev.off()

# EOF


############################################################################################################################
### To look at mC reader peaks in the chromatin states from Jamge et al. 2023

# awk -v OFS="\t" '{print "Chr"$0}' analysis/data/AT_26_ChromHMM_ChromHMM_July_AT_segments.bed | sed 's/E/State_/' > analysis/data/AT26_chromatin_states.txt

# awk -v OFS="\t" '{print $4,$3-$2+1}' analysis/data/AT26_chromatin_states.txt | sort -k1,1 | awk -v OFS="\t" -v s="Genome" 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' > analysis/data/Readers_in_chromatin_states_AT26.txt
# for lab in Jaco Lister
# do
	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="${mbd}_${lab}"
		# file="TF/peaks/best_peaks_Col0_${sample}.bed"
		# if [ -e ${file} ]; then
			 # bedtools intersect -wo -a TF/peaks/best_peaks_Col0_${sample}.bed -b analysis/data/AT26_chromatin_states.txt | awk -v OFS="\t" '{print $14,$15}' | sort -k1,1 | awk -v OFS="\t" -v s=${sample} 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' >> analysis/data/Readers_in_chromatin_states_AT26.txt
		# fi
	# done
# done

# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)
	# library(stringr)
	# library(RColorBrewer)

	# tab<-read.delim("analysis/data/Readers_in_chromatin_states_AT26.txt", header=FALSE, 
                # col.names=c("Reader","State","Coverage"))
	
	# cs=c(brewer.pal(7,"Reds")[7:2],brewer.pal(7,"BuPu")[2:7],brewer.pal(4,"Greys")[2:4],brewer.pal(3,"YlOrBr")[2:1],brewer.pal(9,"Greens"))

	# ## the order that is used in the paper
	# map_to_paper = data.frame(
		# from=c("23","25","24","22","21","26",
			# "1","3","2","4","5","6",
			# "18","19","20",
			# "7","8","9","10","11","12","13","14","17","15","16"),
		# to=factor(c("H1","H2","H3","H4","H5","H6",
				# "F1","F2","F3","F4","F5","F6",
				# "I1","I2","I3",
				# "E1","E2","E3","E4","E5","E6","E7","E8","E9","E10","E11"),
            # levels = c("E11","E10","E9","E8","E7","E6","E5","E4","E3","E2","E1",
                       # "I3","I2","I1",
                       # "F6","F5","F4","F3","F2","F1",
                       # "H6","H5","H4","H3","H2","H1")),
	# cols=cs)

	# names(cs)=map_to_paper$to # to get the right colors in the plots
							
	# tab<-mutate(tab,State=str_remove(State,"^State_"))%>%
		# left_join(map_to_paper,by=c("State"="from"))
	# tab<-select(tab,-State) %>%
		# mutate(State=to)
		
	# tab<-mutate(tab, State=factor(State,levels = c("H1","H2","H3","H4","H5","H6",
                                                           # "F1","F2","F3","F4","F5","F6" ,
                                                           # "I1","I2","I3",
                                                           # "E1","E2","E3","E4","E5","E6","E7","E8","E9","E10","E11" )))

	# plot<-ggplot(tab, aes(Reader, Coverage, fill=State)) +
		# geom_bar(stat = "identity", position = "fill") +
		# scale_fill_manual(values = cs) +
		# theme_bw() +
		# theme(axis.text.x = element_text(size=10, angle=45, vjust = 1, hjust = 1),
			# axis.title.x = element_blank())
		
	# pdf(paste0("analysis/plots/Readers_in_chromatin_states_AT26.pdf"),8,8)
	# print(plot)
	# dev.off()

# EOF


############################################################################################################################

## To generate random peak for Fig2A,C plots

### to load tair10 to homer (to do once)
# perl /grid/martienssen/home/jcahn/miniconda3/envs/default/share/homer/.//configureHomer.pl -install tair10

# ref_dir="/grid/martienssen/home/jcahn/nlsas/Genomes/Arabidopsis"

# awk '$0~/[1-9]$/' ${ref_dir}/TAIR10/chrom.sizes > analysis/data/TAIR10_chrom_sizes.txt
# awk -v OFS="\t" '$3~"transposable_element" {print $0}' ${ref_dir}/TAIR10_GFF3_genes_transposons.gff > analysis/data/TE.gff

# bedtools shuffle -noOverlapping -i TF/peaks/best_peaks_Col0_MBD2_Lister.bed -g analysis/data/TAIR10_chrom_sizes.txt | shuf | sort -k1,1 -k2,2n | awk -v OFS="\t" 'NR <= 2069 {print $1,$2,$3,"Random_"NR}' > analysis/data/random_peaks.bed

# printf "Sample\tAnnotations\tCount\n" > analysis/data/annotated_peaks_stats_merged.txt

# for sample in random MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
# do
	# case "${sample}" in 
		# random)	peakfile="analysis/data/random_peaks.bed";;
		# *)	peakfile="TF/peaks/best_peaks_Col0_${sample}_Lister.bed";;
	# esac
	# annotatePeaks.pl ${peakfile} tair10 -gff ${ref_dir}/TAIR10_GFF3_genes_transposons.gff > analysis/data/tempA_peaks.txt
	# awk -v OFS="\t" '(NR>1) && ($8=="Intergenic" || $8=="Chr") {print $2,$3,$4,$1}' analysis/data/tempA_peaks.txt | sort -k1,1 -k2,2n > analysis/data/temp1_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $8=="promoter-TSS" {print $2,$3,$4,$1}' analysis/data/tempA_peaks.txt | sort -k1,1 -k2,2n > analysis/data/temp1b_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $8=="TTS" {print $2,$3,$4,$1}' analysis/data/tempA_peaks.txt | sort -k1,1 -k2,2n > analysis/data/temp1c_peaks.bed
	# awk -v OFS="\t" '(NR>1) && ($8=="exon" || $8=="3'"'"'" || $8=="5'"'"'") {print $2,$3,$4,$1}' analysis/data/tempA_peaks.txt | sort -k1,1 -k2,2n > analysis/data/temp1d_peaks.bed
	# awk -v OFS="\t" '(NR>1) && $8=="intron" {print $2,$3,$4,$1}' analysis/data/tempA_peaks.txt | sort -k1,1 -k2,2n > analysis/data/temp1e_peaks.bed
	# bedtools intersect -a analysis/data/temp1_peaks.bed -b analysis/data/TE.gff -loj | sort -k1,1 -k2,2n > analysis/data/temp2.txt
	# bedtools intersect -a analysis/data/temp1b_peaks.bed -b analysis/data/TE.gff -loj | sort -k1,1 -k2,2n > analysis/data/temp2b.txt
	# bedtools intersect -a analysis/data/temp1c_peaks.bed -b analysis/data/TE.gff -loj | sort -k1,1 -k2,2n > analysis/data/temp2c.txt
	# bedtools intersect -a analysis/data/temp1d_peaks.bed -b analysis/data/TE.gff -loj | sort -k1,1 -k2,2n > analysis/data/temp2d.txt
	# bedtools intersect -a analysis/data/temp1e_peaks.bed -b analysis/data/TE.gff -loj | sort -k1,1 -k2,2n > analysis/data/temp2e.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE"; else print $1,$2,$3,$4,"Intergenic"}' analysis/data/temp2.txt > analysis/data/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_prom"; else print $1,$2,$3,$4,"Promoter"}' analysis/data/temp2b.txt >> analysis/data/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_TTS"; else print $1,$2,$3,$4,"TTS"}' analysis/data/temp2c.txt >> analysis/data/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_exon"; else print $1,$2,$3,$4,"Exon"}' analysis/data/temp2d.txt >> analysis/data/temp3.txt
	# awk -v OFS="\t" '{if ($7~"transposable_element") print $1,$2,$3,$4,"TE_intron"; else print $1,$2,$3,$4,"Intron"}' analysis/data/temp2e.txt >> analysis/data/temp3.txt
	# awk -v OFS="\t" '{if ($5=="TE_intron" || $5=="TE_exon") print $1,$2,$3,$4,"TE_gene"; else print $1,$2,$3,$4,$5}' analysis/data/temp3.txt > analysis/data/temp4.txt
	# sort -k1,1 -k2,2n analysis/data/temp4.txt | uniq | awk -v OFS="\t" -v s=${sample} '{print $4,$5}' | sort -k1,1 > analysis/data/annotated_peaks_${sample}.txt
	# awk -v OFS="\t" '{print $2}' analysis/data/annotated_peaks_${sample}.txt | sort | uniq -c | awk -v OFS="\t" -v s=${sample} '{print s,$2,$1}' >> analysis/data/annotated_peaks_stats_merged.txt
# done

# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)

	# tab2<-read.delim("analysis/data/annotated_peaks_stats_merged.txt", header=TRUE) %>%
		# group_by(Sample) %>%
		# mutate(Total=sum(Count)) %>%
		# group_by(Sample, Annotations)

	# tab2$Annotations<-factor(tab2$Annotations, levels = rev(c("TE","TE_prom","TE_gene","TE_TTS","Promoter", "Exon", 
                                                                      # "Intron", "TTS", "Intergenic")))
	# tab2$Sample<-factor(tab2$Sample, levels = c("random","MBD1","MBD2","MBD4","MBD5","MBD6","SUVH1","SUVH3"))

  # plot2<-ggplot(tab2, aes(Sample, Count, fill=Annotations)) +
    # geom_bar(stat="identity", position="fill", colour="black", show.legend = T) +
    # theme_bw() + guides(colour = FALSE) +
    # labs(title = paste("Distribution of peak summits by annotation"), 
         # y="Distribution of peaks per annotation", x="", fill="Annotation") +
    # theme(axis.text.x = element_text(color="black", size=8, angle=90, vjust=0.5), 
          # panel.grid = element_blank(), axis.ticks.x=element_blank()) + 
    # annotate("text", x=tab2$Sample, y=1.05, label=tab2$Total, size = 5)
  
	# pdf(paste0("analysis/plots/Random_peaks_annotations.pdf"),8,8)
	# print(plot2)
	# dev.off()

# EOF


############################################################################################################################################################
#### To get distance of peaks to centromeres (as percentage of chromosome arm length
############################################################################################################################################################

### For individual reps

# printf "Chr1\t14511722\t14803970\nChr2\t3611839\t3633423\nChr3\t13589757\t13867121\nChr4\t3133664\t3133674\nChr5\t11194538\t11723210\n" > analysis/data/centromere_positions.bed

# printf "Sample\tPeak_ID\tDistance\n" > analysis/data/distance_to_centromeres_merged.txt
# for chr in Chr1 Chr2 Chr3 Chr4 Chr5
# do
	# cs=$(awk -v c=$chr '$1==c {print $2}' analysis/data/centromere_positions.bed)
	# ce=$(awk -v c=$chr '$1==c {print $3}' analysis/data/centromere_positions.bed)
	# max=$(awk -v c=$chr '$1==c {print $2}' analysis/data/TAIR10_chrom_sizes.txt)
	# cat analysis/data/random_peaks.bed | awk -v OFS="\t" -v c=$chr -v a=$cs -v b=$ce -v m=$max -v s="random" '$1==c {if ($3<a) d=((a-$3)/(a-1))*100; else if ($2>b) d=(($2-b)/(m-b))*100; else d=0; print s,s"_peaks_"NR,d}' >> analysis/data/distance_to_centromeres_merged.txt
# done


# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)

	# tab3<-read.delim("analysis/data/distance_to_centromeres_merged.txt", stringsAsFactors = FALSE, header = TRUE)
	# plot3<-ggplot(tab3, aes(Distance)) +
		# geom_histogram(aes(y=..density..), binwidth = 2) +
		# theme(panel.grid.minor = element_blank(), 
			# panel.grid.major = element_blank(),
			# axis.title.x = element_blank(),
			# axis.ticks.x = element_blank(),
			# axis.text.x = element_text(size=15),
			# axis.text.y = element_text(size=20),
			# axis.title.y = element_text(size=35),
			# panel.background = element_rect(fill = 'white', colour = 'black'),
			# strip.background = element_rect(fill = 'white', colour = 'black'),
			# strip.text = element_text(size = 35)) +
		# scale_x_continuous(breaks = c(15,55,90), labels = c("Centromere",expression(""%->%""),"Telomere")) +
		# labs(fill="", y="Peak density")

	# pdf(paste0("analysis/plots/Random_peaks_distributionn.pdf"),8,8)
	# print(plot3)
	# dev.off()

# EOF

############################################################################################################################################################
#### To get average methylation at each peak for merged samples
############################################################################################################################################################

# file="/grid/martienssen/home/jcahn/nlsas/projects/mC_DDM1/methylcall/WT_A_Rep1.deduplicated.CX_report.txt.gz"

# printf "Sample\tPeak_ID\tContext\tmC\tCoverage\tCounts\n" > analysis/data/random_peaks_mC.txt
# for context in CG CHG CHH
# do
	# zcat ${file} | awk -v OFS="\t" -v c=${context} '$6==c {print $1,$2,$2+1,$6,".",$3,$4,$5}' | bedtools intersect -wao -a analysis/data/random_peaks.bed -b - | bedtools merge -i stdin -o distinct,distinct,sum,sum,sum -c 4,8,11,12,13 | awk -v OFS="\t" '{t=$6+$7; if (t>0 && $8>0) print "Random",$4,$5,$6/t*100,t/$8,$8}' >> analysis/data/random_peaks_mC.txt
# done

# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)
	# library(wesanderson)

	# tab4<-read.delim("analysis/data/random_peaks_mC.txt", stringsAsFactors = FALSE, header = TRUE)
  
	# plot4<-ggplot(tab4, aes(Sample, mC)) + 
				# geom_boxplot(aes(fill=Context), outlier.alpha = 0, alpha=0.5, show.legend = FALSE) +
				# theme(axis.text.x = element_text(color='black', size=20, angle=90, vjust=0.6),
				# axis.text.y = element_text(color='black', size=10),
				# axis.title.y = element_text(color='black', size=15),
				# panel.grid = element_blank(),
				# panel.background = element_rect(fill = 'white', colour = 'black'),
				# strip.background = element_rect(fill = 'white', colour = 'black'), 
				# axis.ticks.x=element_blank(), axis.title.x=element_blank(), 
				# legend.key=element_blank(), strip.text = element_text(size = 20)) + 
				# labs(title = "Average methylation levels at peaks", 
					# y="Methylation level (%)", size=10) +
				# scale_fill_manual(values=wes_palette(n=3, name="Moonrise2"))

	# pdf(paste0("analysis/plots/Random_peaks_mc.pdf"),8,8)
	# print(plot4)
	# dev.off()

# EOF



############################################################################################################################
### To look at mC reader peaks in the chromatin states from chromHMM 20 states

# awk -v OFS="\t" '{print "Chr"$0}' analysis/data/Seedlings_20_segments.bed | sed 's/E/State_/' > analysis/data/ChromHMM_chromatin_states.txt

# awk -v OFS="\t" '{print $4,$3-$2+1}' analysis/data/ChromHMM_chromatin_states.txt | sort -k1,1 | awk -v OFS="\t" -v s="Genome" 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' > analysis/data/Readers_in_chromatin_states_chromHMM.txt
# for lab in Jaco Lister
# do
	# for mbd in MBD1 MBD2 MBD4 MBD5 MBD6 SUVH1 SUVH3
	# do
		# sample="${mbd}_${lab}"
		# file="TF/peaks/best_peaks_Col0_${sample}.bed"
		# if [ -e ${file} ]; then
			 # bedtools intersect -wo -a TF/peaks/best_peaks_Col0_${sample}.bed -b analysis/data/ChromHMM_chromatin_states.txt | awk -v OFS="\t" '{print $14,$15}' | sort -k1,1 | awk -v OFS="\t" -v s=${sample} 'BEGIN {i="State_1"; a=0} {if ($1==i) a+=$2; else { print s,i,a; i=$1; a=$2;}} END {print s,i,a}' >> analysis/data/Readers_in_chromatin_states_chromHMM.txt
		# fi
	# done
# done

# Rscript --vanilla - <<-'EOF'
	# #!/usr/bin/env Rscript

	# library(readr)
	# library(ggplot2)
	# library(dplyr)
	# library(tidyr)
	# library(RColorBrewer)

	# tab<-read.delim("analysis/data/Readers_in_chromatin_states_chromHMM.txt", header=FALSE, 
                # col.names=c("Reader","State","Coverage"))
	# tab$State<-factor(tab$State, levels = paste("State", 1:20, sep = "_"))
	
	# plot<-ggplot(tab, aes(Reader, Coverage, fill=State)) +
		# geom_bar(stat = "identity", position = "fill") +
		# scale_fill_manual(values=c("State_1"="#9a76e9","State_2"="#96d6ae","State_3"="#184c2b","State_4"="#93c7a6",
		# "State_5"="#24e36a","State_6"="#64786b","State_7"="#135029","State_8"="#3b8b58","State_9"="#bcedce",
		# "State_10"="#31bd65","State_11"="#84c9b0","State_12"="#a36d26","State_13"="#5066c6","State_14"="#243276",
		# "State_15"="#7b90f0","State_16"="#02033b","State_17"="#478de4","State_18"="#e79426","State_19"="grey80",
		# "State_20"="#e4a91f")) +
		# theme_bw() +
		# theme(axis.text.x = element_text(size=10, angle=45, vjust = 1, hjust = 1),
			# axis.title.x = element_blank())
		
	# pdf(paste0("analysis/plots/Readers_in_chromatin_states_chromHMM.pdf"),8,8)
	# print(plot)
	# dev.off()
	
	# tab2<-filter(tab, !grepl("Jaco", Reader))
	# plot2<-ggplot(tab2, aes(Reader, Coverage, fill=State)) +
		# geom_bar(stat = "identity", position = "fill") +
		# scale_fill_manual(values=c("State_1"="#9a76e9","State_2"="#96d6ae","State_3"="#184c2b","State_4"="#93c7a6",
		# "State_5"="#24e36a","State_6"="#64786b","State_7"="#135029","State_8"="#3b8b58","State_9"="#bcedce",
		# "State_10"="#31bd65","State_11"="#84c9b0","State_12"="#a36d26","State_13"="#5066c6","State_14"="#243276",
		# "State_15"="#7b90f0","State_16"="#02033b","State_17"="#478de4","State_18"="#e79426","State_19"="grey80",
		# "State_20"="#e4a91f")) +
		# theme_bw() +
		# theme(axis.text.x = element_text(size=10, angle=45, vjust = 1, hjust = 1),
			# axis.title.x = element_blank())
		
	# pdf(paste0("analysis/plots/Readers_in_chromatin_states_chromHMM_v2.pdf"),8,8)
	# print(plot2)
	# dev.off()

# EOF

t=119146000

printf "State\tLoci\tPercentage\n" > analysis/data/summary_chromHMM_states.txt
for i in {1..20}
do
	state="State_${i}"
	n=$(awk -v s=${state} '$4 == s' analysis/data/ChromHMM_chromatin_states.txt | wc -l )
	p=$(awk -v t=${t} -v s=${state} '$4 == s {a+=($3-$2)} END {print a/t*100}' analysis/data/ChromHMM_chromatin_states.txt)
	printf "${state}\t${n}\t${p}\n" >> analysis/data/summary_chromHMM_states.txt
done

cat analysis/data/summary_chromHMM_states.txt

############################################################################################################################
printf "\nScript finished\n"

############################################################################################################################
