#!/usr/bin/bash

# Merge BAM files.
module load samtools
samtools merge b025_Unique_reads_SC1-8_Med1.bam a073_Unique_reads_SC1_Med1.bam a074_Unique_reads_SC2_Med1.bam a075_Unique_reads_SC3_Med1.bam a076_Unique_reads_SC4_Med1.bam a077_Unique_reads_SC5_Med1.bam a078_Unique_reads_SC6_Med1.bam a079_Unique_reads_SC7_Med1.bam a080_Unique_reads_SC8_Med1.bam
samtools merge b027_Unique_reads_SC1-8_IgG_for_Med1.bam a089_Unique_reads_SC1_IgG_for_Med1.bam a090_Unique_reads_SC2_IgG_for_Med1.bam a091_Unique_reads_SC3_IgG_for_Med1.bam a092_Unique_reads_SC4_IgG_for_Med1.bam a093_Unique_reads_SC5_IgG_for_Med1.bam a094_Unique_reads_SC6_IgG_for_Med1.bam a095_Unique_reads_SC7_IgG_for_Med1.bam a096_Unique_reads_SC8_IgG_for_Med1.bam		
samtools merge b028_Unique_reads_SC1-8_5hmC.bam a097_Unique_reads_SC1_5hmC.bam a098_Unique_reads_SC2_5hmC.bam a099_Unique_reads_SC3_5hmC.bam a100_Unique_reads_SC4_5hmC.bam a101_Unique_reads_SC5_5hmC.bam a102_Unique_reads_SC6_5hmC.bam a103_Unique_reads_SC7_5hmC.bam a104_Unique_reads_SC8_5hmC.bam		
samtools merge b029_Unique_reads_SC1-8_IgG_for_5hmC.bam a105_Unique_reads_SC1_IgG_for_5hmC.bam a106_Unique_reads_SC2_IgG_for_5hmC.bam a107_Unique_reads_SC3_IgG_for_5hmC.bam a108_Unique_reads_SC4_IgG_for_5hmC.bam a109_Unique_reads_SC5_IgG_for_5hmC.bam a110_Unique_reads_SC6_IgG_for_5hmC.bam a111_Unique_reads_SC7_IgG_for_5hmC.bam a112_Unique_reads_SC8_IgG_for_5hmC.bam		

# Convert the convined BAM files to BED files.
module load bedtools
bedtools bamtobed -i b025_Unique_reads_SC1-8_Med1.bam >b125_Unique_reads_SC1-8_Med1.bed
bedtools bamtobed -i b027_Unique_reads_SC1-8_IgG_for_Med1.bam >b127_Unique_reads_SC1-8_IgG_for_Med1.bed
bedtools bamtobed -i b028_Unique_reads_SC1-8_5hmC.bam >b128_Unique_reads_SC1-8_5hmC.bed
bedtools bamtobed -i b029_Unique_reads_SC1-8_IgG_for_5hmC.bam >b129_Unique_reads_SC1-8_IgG_for_5hmC.bed

# Generate randomized controls.
bedtools shuffle -i b125_Unique_reads_SC1-8_Med1.bed -g GRCh38.genome >b225_Randomized_reads_SC1-8_Med1.bed
bedtools shuffle -i b127_Unique_reads_SC1-8_IgG_for_Med1.bed -g GRCh38.genome >b227_Randomized_reads_SC1-8_IgG_for_Med1.bed
bedtools shuffle -i b128_Unique_reads_SC1-8_5hmC.bed -g GRCh38.genome >b228_Randomized_reads_SC1-8_5hmC.bed
bedtools shuffle -i b129_Unique_reads_SC1-8_IgG_for_5hmC.bed -g GRCh38.genome >b229_Randomized_reads_SC1-8_IgG_for_5hmC.bed

# Generate genome bins, 500bp bin sliding 250 bp.
bedtools makewindows -g GRCh38.genome -w 500 -s 250 >b300_Genome_bin_500bp_250bp_sliding_GRCh38.bed

# Count reads in the bins.
bedtools intersect -a b300_Genome_bin_500bp_250bp_sliding_GRCh38.bed -b b125_Unique_reads_SC1-8_Med1.bed -c >b600.bed
bedtools intersect -a b600.bed -b b127_Unique_reads_SC1-8_IgG_for_Med1.bed -c >b601.bed
bedtools intersect -a b601.bed -b b225_Randomized_reads_SC1-8_Med1.bed -c >b602.bed
bedtools intersect -a b602.bed -b b227_Randomized_reads_SC1-8_IgG_for_Med1.bed -c >b603_Med1_IgG_counts_in_bins.bed
bedtools intersect -a b300_Genome_bin_500bp_250bp_sliding_GRCh38.bed -b b128_Unique_reads_SC1-8_5hmC.bed -c >b800.bed
bedtools intersect -a b800.bed -b b129_Unique_reads_SC1-8_IgG_for_5hmC.bed -c >b801.bed
bedtools intersect -a b801.bed -b b228_Randomized_reads_SC1-8_5hmC.bed -c >b802.bed
bedtools intersect -a b802.bed -b b229_Randomized_reads_SC1-8_IgG_for_5hmC.bed -c >b803_5hmC_IgG_counts_in_bins.bed

# Select rows (bins) containing Ab reads.
cat b603_Med1_IgG_counts_in_bins.bed | awk '$4>0' >c017_Med1_IgG_counts_in_bins.bed
cat b803_5hmC_IgG_counts_in_bins.bed | awk '$4>0' >c019_5hmC_IgG_counts_in_bins.bed

# Calculate Ab-IgG, Ab-Random IgG, Random Ab-IgG, Random Ab-Random IgG.
cat c017_Med1_IgG_counts_in_bins.bed | awk  'BEGIN {OFS="\t"}; {print $1, $2, $3, $4, $5, $6, $7, $4-$5, $4-$7, $6-$5, $6-$7}' >c066_Med1_Delta_IgG.bed
cat c019_5hmC_IgG_counts_in_bins.bed | awk  'BEGIN {OFS="\t"}; {print $1, $2, $3, $4, $5, $6, $7, $4-$5, $4-$7, $6-$5, $6-$7}' >c068_5hmC_Delta_IgG.bed

# Add header to the BED files for the following bootstrap test.
cat c066_Med1_Delta_IgG.bed | sed 1s/^/"Chr\tStart\tEnd\tAb\tIgG\tRandAb\tRandIgG\tAbDIgG\tAbDRandIgG\tRandAbDIgG\tRandAbDRandIgG\n"/  >c116_Med1_Delta_IgG_header.bed
cat c068_5hmC_Delta_IgG.bed | sed 1s/^/"Chr\tStart\tEnd\tAb\tIgG\tRandAb\tRandIgG\tAbDIgG\tAbDRandIgG\tRandAbDIgG\tRandAbDRandIgG\n"/  >c118_5hmC_Delta_IgG_header.bed

# Make directories for bootstrap test.
mkdir c116_Med1_Delta_IgG_SC1-8
mkdir c118_5hmC_Delta_IgG_SC1-8

# Copy the BED files into the directories with unified file name.
cp c116_Med1_Delta_IgG_header.bed c116_Med1_Delta_IgG_SC1-8/Input.bed
cp c118_5hmC_Delta_IgG_header.bed c118_5hmC_Delta_IgG_SC1-8/Input.bed

# Copy a R script file of bootstrap test into the directories.
cp c200_Rscript_boot.r c116_Med1_Delta_IgG_SC1-8/c200_Rscript_boot.r
cp c200_Rscript_boot.r c118_5hmC_Delta_IgG_SC1-8/c200_Rscript_boot.r

# Adjust bin number to 200,000 bins for bootstrap test.
cd /data/ohnukih/Nature_Methods/c001_Bootstrap/c116_Med1_Delta_IgG_SC1-8 ; head -n 200000 Input.bed >Input_200000L.bed
cd /data/ohnukih/Nature_Methods/c001_Bootstrap/c118_5hmC_Delta_IgG_SC1-8 ; head -n 200000 Input.bed >Input_200000L.bed

# Run swarm command for excuting Rscript in each directory.
swarm -f c300_Swarm_Rscript_boot.swarm --module R/3.5 --time 24:00:00 --partition=ccr,norm -g 240 -t 50 

# C001. Step14. Extract putative signal regions using upper side of 99.9% confidence interval determined by bootstrap test.
cat c116_Med1_Delta_IgG_header.bed | awk '$8>1' >C416_Med1_Delta_IgG_SC1-8_PutativeSignalRegions_CI0.99.bed
cat c118_5hmC_Delta_IgG_header.bed | awk '$8>5' >C418_5hmC_Delta_IgG_SC1-8_PutativeSignalRegions_CI0.99.bed

# C001. Step15. Count region number of putative signals extracted in previous step.
find -name 'C4*_PutativeSignalRegions_CI*.bed' -exec wc -l {} \; >C500_Summary_region_number_of_putative_signals.csv

# C001. Step16. Count read number of putative signals.
cat C416_Med1_Delta_IgG_SC1-8_PutativeSignalRegions_CI0.99.bed | awk '{s += $8} END {print s}' >C616_Med1_Delta_IgG_SC1-8_Putative_Signal_Reads_CI0.999.bed
cat C418_5hmC_Delta_IgG_SC1-8_PutativeSignalRegions_CI0.99.bed | awk '{s += $8} END {print s}' >C618_5hmC_Delta_IgG_SC1-8_Putative_Signal_Reads_CI0.999.bed

# Count read number of putative signals extracted in previous step.
find -name 'C6*_Putative_Signal_Reads_CI*.bed' -exec wc -l {} \; >C699_Summary_read_number_of_putative_signals.csv

# C001_2. Putative signal reads of Med1, HP1g and 5hmC in each single cell.

# Count reads of Med1, HP1g, 5hmC and IgG in the putative signal regions.
bedtools intersect -a C416_Med1_Delta_IgG_SC1-8_PutativeSignalRegions_CI0.99.bed -b a073_Unique_reads_SC1_Med1.bam -c >C700.bed
bedtools intersect -a C700.bed -b a074_Unique_reads_SC2_Med1.bam -c >C701.bed
bedtools intersect -a C701.bed -b a075_Unique_reads_SC3_Med1.bam -c >C702.bed
bedtools intersect -a C702.bed -b a076_Unique_reads_SC4_Med1.bam -c >C703.bed
bedtools intersect -a C703.bed -b a077_Unique_reads_SC5_Med1.bam -c >C704.bed
bedtools intersect -a C704.bed -b a078_Unique_reads_SC6_Med1.bam -c >C705.bed
bedtools intersect -a C705.bed -b a079_Unique_reads_SC7_Med1.bam -c >C706.bed
bedtools intersect -a C706.bed -b a080_Unique_reads_SC8_Med1.bam -c >C707.bed
bedtools intersect -a C707.bed -b a089_Unique_reads_SC1_IgG_for_Med1.bam -c >C708.bed
bedtools intersect -a C708.bed -b a090_Unique_reads_SC2_IgG_for_Med1.bam -c >C709.bed
bedtools intersect -a C709.bed -b a091_Unique_reads_SC3_IgG_for_Med1.bam -c >C710.bed
bedtools intersect -a C710.bed -b a092_Unique_reads_SC4_IgG_for_Med1.bam -c >C711.bed
bedtools intersect -a C711.bed -b a093_Unique_reads_SC5_IgG_for_Med1.bam -c >C712.bed
bedtools intersect -a C712.bed -b a094_Unique_reads_SC6_IgG_for_Med1.bam -c >C713.bed
bedtools intersect -a C713.bed -b a095_Unique_reads_SC7_IgG_for_Med1.bam -c >C714.bed
bedtools intersect -a C714.bed -b a096_Unique_reads_SC8_IgG_for_Med1.bam -c >C715_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed

bedtools intersect -a C418_5hmC_Delta_IgG_SC1-8_PutativeSignalRegions_CI0.99.bed -b a097_Unique_reads_SC1_5hmC.bam -c >C764.bed
bedtools intersect -a C764.bed -b a098_Unique_reads_SC2_5hmC.bam -c >C765.bed
bedtools intersect -a C765.bed -b a099_Unique_reads_SC3_5hmC.bam -c >C766.bed
bedtools intersect -a C766.bed -b a100_Unique_reads_SC4_5hmC.bam -c >C767.bed
bedtools intersect -a C767.bed -b a101_Unique_reads_SC5_5hmC.bam -c >C768.bed
bedtools intersect -a C768.bed -b a102_Unique_reads_SC6_5hmC.bam -c >C769.bed
bedtools intersect -a C769.bed -b a103_Unique_reads_SC7_5hmC.bam -c >C770.bed
bedtools intersect -a C770.bed -b a104_Unique_reads_SC8_5hmC.bam -c >C771.bed
bedtools intersect -a C771.bed -b a105_Unique_reads_SC1_IgG_for_5hmC.bam -c >C772.bed
bedtools intersect -a C772.bed -b a106_Unique_reads_SC2_IgG_for_5hmC.bam -c >C773.bed
bedtools intersect -a C773.bed -b a107_Unique_reads_SC3_IgG_for_5hmC.bam -c >C774.bed
bedtools intersect -a C774.bed -b a108_Unique_reads_SC4_IgG_for_5hmC.bam -c >C775.bed
bedtools intersect -a C775.bed -b a109_Unique_reads_SC5_IgG_for_5hmC.bam -c >C776.bed
bedtools intersect -a C776.bed -b a110_Unique_reads_SC6_IgG_for_5hmC.bam -c >C777.bed
bedtools intersect -a C777.bed -b a111_Unique_reads_SC7_IgG_for_5hmC.bam -c >C778.bed
bedtools intersect -a C778.bed -b a112_Unique_reads_SC8_IgG_for_5hmC.bam -c >C779_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed

# Calculate AbDIgG.
cat C715_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk 'BEGIN {OFS="\t"}; {print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12-$20, $13-$21, $14-$22, $15-$23, $16-$24, $17-$25, $18-$26, $19-$27}' >C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed
cat C779_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk 'BEGIN {OFS="\t"}; {print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12-$20, $13-$21, $14-$22, $15-$23, $16-$24, $17-$25, $18-$26, $19-$27}' >C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed

# Calculate total AbDIgG in the putative signal regions of Med1, HP1g and 5hmC.
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk  '{a += $12; b += $13; c += $14; d += $15; e += $16; f += $17; g += $18; h += $19} END {print a, b, c, d, e, f, g, h}' >C900_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.txt
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk  '{a += $12; b += $13; c += $14; d += $15; e += $16; f += $17; g += $18; h += $19} END {print a, b, c, d, e, f, g, h}' >C904_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.txt

# Extract putative signal regions of each cell.
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$12>0' >D001_Med1_putative_signal_regions_SC1.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$13>0' >D002_Med1_putative_signal_regions_SC2.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$14>0' >D003_Med1_putative_signal_regions_SC3.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$15>0' >D004_Med1_putative_signal_regions_SC4.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$16>0' >D005_Med1_putative_signal_regions_SC5.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$17>0' >D006_Med1_putative_signal_regions_SC6.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$18>0' >D007_Med1_putative_signal_regions_SC7.bed
cat C800_Med1_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$19>0' >D008_Med1_putative_signal_regions_SC8.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$12>0' >D017_5hmC_putative_signal_regions_SC1.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$13>0' >D018_5hmC_putative_signal_regions_SC2.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$14>0' >D019_5hmC_putative_signal_regions_SC3.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$15>0' >D020_5hmC_putative_signal_regions_SC4.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$16>0' >D021_5hmC_putative_signal_regions_SC5.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$17>0' >D022_5hmC_putative_signal_regions_SC6.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$18>0' >D023_5hmC_putative_signal_regions_SC7.bed
cat C804_5hmC_Delta_IgG_SC1-8_PutativeSignalReads_CI0.99.bed | awk '$19>0' >D024_5hmC_putative_signal_regions_SC8.bed

# Summarize number of signal regions in each cell.
find -name 'D0*_putative_signal_regions_SC*.bed' -exec wc -l {} \; >D050_Summary_region_number_of_putative_signals.csv
