#!/bin/bash

#https://hicexplorer.readthedocs.io/en/latest/content/mES-HiC_analysis.html#build-visualize-and-correct-hi-c-matrix

#create the directories
cd ~
mkdir -p BG3_Kc167_HiC

cd BG3_Kc167_HiC/

mkdir -p mapped_files
mkdir -p fastq
mkdir -p hiCmatrix
mkdir -p plots
mkdir -p TADs
mkdir -p compartments

################################################################################
#  download files check that the correct identifiers have been used.
################################################################################
fastq-dump --split-files --outdir fastq SRX5014527 #Kc167 rep1
fastq-dump --split-files --outdir fastq SRX5014528 #Kc167 rep2
fastq-dump --split-files --outdir fastq SRX5014529 #BG3 rep1
fastq-dump --split-files --outdir fastq SRX5014530 #BG3 rep2


################################################################################
#  Subsample the Kc167 files
################################################################################
python subsample.py 0.82 fastq/SRX5014527_1.fastq fastq/SRX5014527_2.fastq fastq/SRX5014527_sub_1.fastq fastq/SRX5014527_sub_2.fastq
python subsample.py 0.65 fastq/SRX5014528_1.fastq fastq/SRX5014528_2.fastq fastq/SRX5014528_sub_1.fastq fastq/SRX5014528_sub_2.fastq


################################################################################
#  align the reads
################################################################################

#download the genome and prepare it
#bwa index -a bwtsw ~/genomes/DM6/dm6.fa

#Kc167 rep1
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014527_1.fastq | samtools view -Shb - >  mapped_files/SRX5014527_1.sam
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014527_2.fastq | samtools view -Shb - >  mapped_files/SRX5014527_2.sam

#Kc167 rep2
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014528_1.fastq | samtools view -Shb - >  mapped_files/SRX5014528_1.sam
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014528_2.fastq | samtools view -Shb - >  mapped_files/SRX5014528_2.sam

#BG3 rep1
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa fastq/SRX5014529_1.fastq | samtools view -Shb - > mapped_files/SRX5014529_1.sam
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa fastq/SRX5014529_2.fastq | samtools view -Shb - > mapped_files/SRX5014529_2.sam


#BG3 rep2
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa fastq/SRX5014530_1.fastq | samtools view -Shb - > mapped_files/SRX5014530_1.sam
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa fastq/SRX5014530_2.fastq | samtools view -Shb - > mapped_files/SRX5014530_2.sam

#Kc167 subset rep1
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014527_sub_1.fastq | samtools view -Shb - >  mapped_files/SRX5014527_sub_1.sam
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014527_sub_2.fastq | samtools view -Shb - >  mapped_files/SRX5014527_sub_2.sam

#Kc167 subset rep2
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014528_sub_1.fastq | samtools view -Shb - >  mapped_files/SRX5014528_sub_1.sam
bwa mem -t 20 -A1 -B4 -E50 -L0 ~/genomes/DM6/dm6.fa  fastq/SRX5014528_sub_2.fastq | samtools view -Shb - >  mapped_files/SRX5014528_sub_2.sam




################################################################################
# Build Matrices  Kc167 DPNII for TADs
################################################################################

#Keerthi rep1 150bp
hicBuildMatrix --samFiles mapped_files/SRX5014527_1.sam mapped_files/SRX5014527_2.sam \
               --restrictionCutFile dm6_DPNII_positions.bed \
               --minDistance 150 \
               --threads 30 \
                --outBam Kc167_rep1_hic_dpnII.bam \
                -o hiCmatrix/Kc167_rep1_hic_matrix_dpnII.npz \
                --QCfolder hiCmatrix/Kc167_rep1_hic_matrix_dpnII_hicQC

#Keerthi rep2 150bp
hicBuildMatrix --samFiles mapped_files/SRX5014528_1.sam mapped_files/SRX5014528_2.sam \
               --restrictionCutFile dm6_DPNII_positions.bed \
               --minDistance 150 \
               --threads 30 \
                --outBam Kc167_rep2_hic_dpnII.bam \
                -o hiCmatrix/Kc167_rep2_hic_matrix_dpnII.npz \
                --QCfolder hiCmatrix/Kc167_rep2_hic_matrix_dpnII_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/Kc167_rep1_hic_matrix_dpnII.npz hiCmatrix/Kc167_rep2_hic_matrix_dpnII.npz \
                -o hiCmatrix/Kc167_merged_hic_matrix_dpnII.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/Kc167_merged_hic_matrix_dpnII.npz -o plots/Kc167_merged_hic_matrix_dpnII_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_rep1_hic_matrix_dpnII.npz -o plots/Kc167_rep1_hic_matrix_dpnII_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_rep2_hic_matrix_dpnII.npz -o plots/Kc167_rep2_hic_matrix_dpnII_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_merged_hic_matrix_dpnII.npz -o hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_rep1_hic_matrix_dpnII.npz -o hiCmatrix/Kc167_rep1_hic_matrix_dpnII_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_rep2_hic_matrix_dpnII.npz -o hiCmatrix/Kc167_rep2_hic_matrix_dpnII_corrected.npz


################################################################################
# Build Matrices  Kc167 100Kb for plots
################################################################################
#Keerthi rep1 100Kb
hicBuildMatrix --samFiles mapped_files/SRX5014527_1.sam mapped_files/SRX5014527_2.sam \
                --binSize 100000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_rep1_hic_100Kb.bam \
                -o hiCmatrix/Kc167_rep1_hic_matrix_100Kb.npz \
                --QCfolder hiCmatrix/Kc167_rep1_hic_matrix_100Kb_hicQC

#Keerthi rep2 100Kb
hicBuildMatrix --samFiles mapped_files/SRX5014528_1.sam mapped_files/SRX5014528_2.sam \
                --binSize 100000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_rep2_hic_100Kb.bam \
                -o hiCmatrix/Kc167_rep2_hic_matrix_100Kb.npz \
                --QCfolder hiCmatrix/Kc167_rep2_hic_matrix_100Kb_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/Kc167_rep1_hic_matrix_100Kb.npz hiCmatrix/Kc167_rep2_hic_matrix_100Kb.npz \
                -o hiCmatrix/Kc167_merged_hic_matrix_100Kb.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/Kc167_merged_hic_matrix_100Kb.npz -o plots/Kc167_merged_hic_matrix_100Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_rep1_hic_matrix_100Kb.npz -o plots/Kc167_rep1_hic_matrix_100Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_rep2_hic_matrix_100Kb.npz -o plots/Kc167_rep2_hic_matrix_100Kb_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -3.0 5 -m hiCmatrix/Kc167_merged_hic_matrix_100Kb.npz -o hiCmatrix/Kc167_merged_hic_matrix_100Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -3.0 5 -m hiCmatrix/Kc167_rep1_hic_matrix_100Kb.npz -o hiCmatrix/Kc167_rep1_hic_matrix_100Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -3.0 5 -m hiCmatrix/Kc167_rep2_hic_matrix_100Kb.npz -o hiCmatrix/Kc167_rep2_hic_matrix_100Kb_corrected.npz


################################################################################
# Build Matrices  Kc167 10Kb for compartments
################################################################################

#Keerthi rep1 10Kb
hicBuildMatrix --samFiles mapped_files/SRX5014527_1.sam mapped_files/SRX5014527_2.sam \
                --binSize 10000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_rep1_hic_10Kb.bam \
                -o hiCmatrix/Kc167_rep1_hic_matrix_10Kb.npz \
                --QCfolder hiCmatrix/Kc167_rep1_hic_matrix_10Kb_hicQC

#Keerthi rep2 10Kb
hicBuildMatrix --samFiles mapped_files/SRX5014528_1.sam mapped_files/SRX5014528_2.sam \
                --binSize 10000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_rep2_hic_10Kb.bam \
                -o hiCmatrix/Kc167_rep2_hic_matrix_10Kb.npz \
                --QCfolder hiCmatrix/Kc167_rep2_hic_matrix_10Kb_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/Kc167_rep1_hic_matrix_10Kb.npz hiCmatrix/Kc167_rep2_hic_matrix_10Kb.npz \
                -o hiCmatrix/Kc167_merged_hic_matrix_10Kb.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/Kc167_merged_hic_matrix_10Kb.npz -o plots/Kc167_merged_hic_matrix_10Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_rep1_hic_matrix_10Kb.npz -o plots/Kc167_rep1_hic_matrix_10Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_rep2_hic_matrix_10Kb.npz -o plots/Kc167_rep2_hic_matrix_10Kb_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_merged_hic_matrix_10Kb.npz -o hiCmatrix/Kc167_merged_hic_matrix_10Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_rep1_hic_matrix_10Kb.npz -o hiCmatrix/Kc167_rep1_hic_matrix_10Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_rep2_hic_matrix_10Kb.npz -o hiCmatrix/Kc167_rep2_hic_matrix_10Kb_corrected.npz


################################################################################
# Build Matrices BG3 DPNII for TADs
################################################################################

#Keerthi rep1 150bp
hicBuildMatrix --samFiles mapped_files/SRX5014529_1.sam mapped_files/SRX5014529_2.sam \
               --restrictionCutFile dm6_DPNII_positions.bed \
               --minDistance 150 \
               --threads 30 \
                --outBam BG3_rep1_hic_dpnII.bam \
                -o hiCmatrix/BG3_rep1_hic_matrix_dpnII.npz \
                --QCfolder hiCmatrix/BG3_rep1_hic_matrix_dpnII_hicQC

#Keerthi rep2 150bp
hicBuildMatrix --samFiles mapped_files/SRX5014530_1.sam mapped_files/SRX5014530_2.sam \
               --restrictionCutFile dm6_DPNII_positions.bed \
               --minDistance 150 \
               --threads 30 \
                --outBam BG3_rep2_hic_dpnII.bam \
                -o hiCmatrix/BG3_rep2_hic_matrix_dpnII.npz \
                --QCfolder hiCmatrix/BG3_rep2_hic_matrix_dpnII_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/BG3_rep1_hic_matrix_dpnII.npz hiCmatrix/BG3_rep2_hic_matrix_dpnII.npz \
                -o hiCmatrix/BG3_merged_hic_matrix_dpnII.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/BG3_merged_hic_matrix_dpnII.npz -o plots/BG3_merged_hic_matrix_dpnII_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/BG3_rep1_hic_matrix_dpnII.npz -o plots/BG3_rep1_hic_matrix_dpnII_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/BG3_rep2_hic_matrix_dpnII.npz -o plots/BG3_rep2_hic_matrix_dpnII_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/BG3_merged_hic_matrix_dpnII.npz -o hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/BG3_rep1_hic_matrix_dpnII.npz -o hiCmatrix/BG3_rep1_hic_matrix_dpnII_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/BG3_rep2_hic_matrix_dpnII.npz -o hiCmatrix/BG3_rep2_hic_matrix_dpnII_corrected.npz


################################################################################
# Build Matrices BG3 100Kb for plots
################################################################################
#Keerthi rep1 100Kb
hicBuildMatrix --samFiles mapped_files/SRX5014529_1.sam mapped_files/SRX5014529_2.sam \
                --binSize 100000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam BG3_rep1_hic_100Kb.bam \
                -o hiCmatrix/BG3_rep1_hic_matrix_100Kb.npz \
                --QCfolder hiCmatrix/BG3_rep1_hic_matrix_100Kb_hicQC

#Keerthi rep2 100Kb
hicBuildMatrix --samFiles mapped_files/SRX5014530_1.sam mapped_files/SRX5014530_2.sam \
                --binSize 100000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam BG3_rep2_hic_100Kb.bam \
                -o hiCmatrix/BG3_rep2_hic_matrix_100Kb.npz \
                --QCfolder hiCmatrix/BG3_rep2_hic_matrix_100Kb_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/BG3_rep1_hic_matrix_100Kb.npz hiCmatrix/BG3_rep2_hic_matrix_100Kb.npz \
                -o hiCmatrix/BG3_merged_hic_matrix_100Kb.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/BG3_merged_hic_matrix_100Kb.npz -o plots/BG3_merged_hic_matrix_100Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/BG3_rep1_hic_matrix_100Kb.npz -o plots/BG3_rep1_hic_matrix_100Kb_diagnostic_plot.png

# generate diagnostic plotshicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/BG3_rep2_hic_matrix_100Kb.npz -o plots/BG3_rep2_hic_matrix_100Kb_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -2.4 5 -m hiCmatrix/BG3_merged_hic_matrix_100Kb.npz -o hiCmatrix/BG3_merged_hic_matrix_100Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -2.4 5 -m hiCmatrix/BG3_rep1_hic_matrix_100Kb.npz -o hiCmatrix/BG3_rep1_hic_matrix_100Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -2.4 5 -m hiCmatrix/BG3_rep2_hic_matrix_100Kb.npz -o hiCmatrix/BG3_rep2_hic_matrix_100Kb_corrected.npz


################################################################################
# Build Matrices BG3 10Kb for compartments
################################################################################

#Keerthi rep1 10Kb
hicBuildMatrix --samFiles mapped_files/SRX5014529_1.sam mapped_files/SRX5014529_2.sam \
                --binSize 10000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam BG3_rep1_hic_10Kb.bam \
                -o hiCmatrix/BG3_rep1_hic_matrix_10Kb.npz \
                --QCfolder hiCmatrix/BG3_rep1_hic_matrix_10Kb_hicQC

#Keerthi rep2 10Kb
hicBuildMatrix --samFiles mapped_files/SRX5014530_1.sam mapped_files/SRX5014530_2.sam \
                --binSize 10000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam BG3_rep2_hic_10Kb.bam \
                -o hiCmatrix/BG3_rep2_hic_matrix_10Kb.npz \
                --QCfolder hiCmatrix/BG3_rep2_hic_matrix_10Kb_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/BG3_rep1_hic_matrix_10Kb.npz hiCmatrix/BG3_rep2_hic_matrix_10Kb.npz \
                -o hiCmatrix/BG3_merged_hic_matrix_10Kb.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/BG3_merged_hic_matrix_10Kb.npz -o plots/BG3_merged_hic_matrix_10Kb_diagnostic_plot.png

# generate diagnostic plotshicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/BG3_rep1_hic_matrix_10Kb.npz -o plots/BG3_rep1_hic_matrix_10Kb_diagnostic_plot.png

# generate diagnostic plotshicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/BG3_rep2_hic_matrix_10Kb.npz -o plots/BG3_rep2_hic_matrix_10Kb_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/BG3_merged_hic_matrix_10Kb.npz -o hiCmatrix/BG3_merged_hic_matrix_10Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/BG3_rep1_hic_matrix_10Kb.npz -o hiCmatrix/BG3_rep1_hic_matrix_10Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/BG3_rep2_hic_matrix_10Kb.npz -o hiCmatrix/BG3_rep2_hic_matrix_10Kb_corrected.npz



################################################################################
# Build Matrices  Kc167 DPNII for TADs subsample
################################################################################

#Keerthi rep1 150bp
hicBuildMatrix --samFiles mapped_files/SRX5014527_sub_1.sam mapped_files/SRX5014527_sub_2.sam \
               --restrictionCutFile dm6_DPNII_positions.bed \
               --minDistance 150 \
               --threads 30 \
                --outBam Kc167_sub_rep1_hic_dpnII.bam \
                -o hiCmatrix/Kc167_sub_rep1_hic_matrix_dpnII.npz \
                --QCfolder hiCmatrix/Kc167_sub_rep1_hic_matrix_dpnII_hicQC

#Keerthi rep2 150bp
hicBuildMatrix --samFiles mapped_files/SRX5014528_sub_1.sam mapped_files/SRX5014528_sub_2.sam \
               --restrictionCutFile dm6_DPNII_positions.bed \
               --minDistance 150 \
               --threads 30 \
                --outBam Kc167_sub_rep2_hic_dpnII.bam \
                -o hiCmatrix/Kc167_sub_rep2_hic_matrix_dpnII.npz \
                --QCfolder hiCmatrix/Kc167_sub_rep2_hic_matrix_dpnII_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/Kc167_sub_rep1_hic_matrix_dpnII.npz hiCmatrix/Kc167_sub_rep2_hic_matrix_dpnII.npz \
                -o hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII.npz -o plots/Kc167_sub_merged_hic_matrix_dpnII_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_sub_rep1_hic_matrix_dpnII.npz -o plots/Kc167_sub_rep1_hic_matrix_dpnII_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_sub_rep2_hic_matrix_dpnII.npz -o plots/Kc167_sub_rep2_hic_matrix_dpnII_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII.npz -o hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_sub_rep1_hic_matrix_dpnII.npz -o hiCmatrix/Kc167_sub_rep1_hic_matrix_dpnII_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_sub_rep2_hic_matrix_dpnII.npz -o hiCmatrix/Kc167_sub_rep2_hic_matrix_dpnII_corrected.npz


################################################################################
# Build Matrices  Kc167 100Kb for plots
################################################################################
#Keerthi rep1 100Kb
hicBuildMatrix --samFiles mapped_files/SRX5014527_sub_1.sam mapped_files/SRX5014527_sub_2.sam \
                --binSize 100000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_sub_rep1_hic_100Kb.bam \
                -o hiCmatrix/Kc167_sub_rep1_hic_matrix_100Kb.npz \
                --QCfolder hiCmatrix/Kc167_sub_rep1_hic_matrix_100Kb_hicQC

#Keerthi rep2 100Kb
hicBuildMatrix --samFiles mapped_files/SRX5014528_sub_1.sam mapped_files/SRX5014528_sub_2.sam \
                --binSize 100000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_sub_rep2_hic_100Kb.bam \
                -o hiCmatrix/Kc167_sub_rep2_hic_matrix_100Kb.npz \
                --QCfolder hiCmatrix/Kc167_sub_rep2_hic_matrix_100Kb_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/Kc167_sub_rep1_hic_matrix_100Kb.npz hiCmatrix/Kc167_sub_rep2_hic_matrix_100Kb.npz \
                -o hiCmatrix/Kc167_sub_merged_hic_matrix_100Kb.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/Kc167_sub_merged_hic_matrix_100Kb.npz -o plots/Kc167_sub_merged_hic_matrix_100Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_sub_rep1_hic_matrix_100Kb.npz -o plots/Kc167_sub_rep1_hic_matrix_100Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_sub_rep2_hic_matrix_100Kb.npz -o plots/Kc167_sub_rep2_hic_matrix_100Kb_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -3.0 5 -m hiCmatrix/Kc167_sub_merged_hic_matrix_100Kb.npz -o hiCmatrix/Kc167_sub_merged_hic_matrix_100Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -3.0 5 -m hiCmatrix/Kc167_sub_rep1_hic_matrix_100Kb.npz -o hiCmatrix/Kc167_sub_rep1_hic_matrix_100Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -3.0 5 -m hiCmatrix/Kc167_sub_rep2_hic_matrix_100Kb.npz -o hiCmatrix/Kc167_sub_rep2_hic_matrix_100Kb_corrected.npz


################################################################################
# Build Matrices  Kc167 10Kb for compartments
################################################################################

#Keerthi rep1 10Kb
hicBuildMatrix --samFiles mapped_files/SRX5014527_sub_1.sam mapped_files/SRX5014527_sub_2.sam \
                --binSize 10000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_sub_rep1_hic_10Kb.bam \
                -o hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb.npz \
                --QCfolder hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb_hicQC

#Keerthi rep2 10Kb
hicBuildMatrix --samFiles mapped_files/SRX5014528_sub_1.sam mapped_files/SRX5014528_sub_2.sam \
                --binSize 10000 \
                --restrictionSequence GATC \
                --threads 30 \
                --outBam Kc167_sub_rep2_hic_10Kb.bam \
                -o hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb.npz \
                --QCfolder hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb_hicQC


#Merge (sum) matrices from replicates
hicSumMatrices -m hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb.npz hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb.npz \
                -o hiCmatrix/Kc167_sub_merged_hic_matrix_10Kb.npz

hicCorrectMatrix diagnostic_plot \
-m hiCmatrix/Kc167_sub_merged_hic_matrix_10Kb.npz -o plots/Kc167_sub_merged_hic_matrix_10Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb.npz -o plots/Kc167_sub_rep1_hic_matrix_10Kb_diagnostic_plot.png

# generate diagnostic plots
hicCorrectMatrix diagnostic_plot \
            -m hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb.npz -o plots/Kc167_sub_rep2_hic_matrix_10Kb_diagnostic_plot.png



#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_sub_merged_hic_matrix_10Kb.npz -o hiCmatrix/Kc167_sub_merged_hic_matrix_10Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb.npz -o hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb_corrected.npz

#Correction of Hi-C matrix
hicCorrectMatrix correct --filterThreshold -1.4 5 -m hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb.npz -o hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb_corrected.npz



################################################################################
# Dist Vs Counts all 100Kb
################################################################################
hicPlotDistVsCounts -m \
hiCmatrix/BG3_rep1_hic_matrix_10Kb_corrected.npz \
hiCmatrix/BG3_rep2_hic_matrix_10Kb_corrected.npz \
hiCmatrix/BG3_merged_hic_matrix_10Kb_corrected.npz \
hiCmatrix/Kc167_sub_rep1_hic_matrix_10Kb_corrected.npz \
hiCmatrix/Kc167_sub_rep2_hic_matrix_10Kb_corrected.npz \
hiCmatrix/Kc167_sub_merged_hic_matrix_10Kb_corrected.npz \
-o plots/counts_vs_dist_10Kb_all_sub.png \
--outFileData plots/counts_vs_dist_10Kb_all_sub.dat \
--labels 'BG3 rep1' 'BG3 rep2' 'BG3 merged' 'Kc167 rep1' 'Kc167 rep2' 'Kc167 merged' \
--maxdepth 20000000 \
--plotsize 5 4.2


################################################################################
# export for plots matrix at 100Kb
################################################################################
hicExport --inFile hiCmatrix/BG3_merged_hic_matrix_100Kb_corrected.npz --inputFormat npz \
--outFileName hiCmatrix/BG3_merged_hic_matrix_100Kb_corrected.mat --outputFormat dekker \
--chromosomeOrder 2L 2R 3L 3R 4 X

hicCompareMatrices --matrices hiCmatrix/Kc167_merged_hic_matrix_100Kb_corrected.npz \
hiCmatrix/BG3_merged_hic_matrix_100Kb_corrected.npz \
-o hiCmatrix/Kc167_BG3_merged_hic_matrix_100Kb_corrected.npz --operation log2ratio

hicExport --inFile hiCmatrix/Kc167_BG3_merged_hic_matrix_100Kb_corrected.npz --inputFormat npz \
--outFileName hiCmatrix/Kc167_BG3_merged_hic_matrix_100Kb_corrected.mat --outputFormat dekker \
--chromosomeOrder 2L 2R 3L 3R 4 X


################################################################################
# call TADs cells
################################################################################
hicFindTADs -m hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected.npz \
--outPrefix TADs/BG3_merged_hic_matrix_dpnII_corrected \
--correctForMultipleTesting fdr \
--numberOfProcessors 30 \
--minBoundaryDistance 5000 --thresholdComparisons 0.01 --delta 0.04 \
--step 2000


hicFindTADs -m hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected.npz \
--outPrefix TADs/Kc167_merged_hic_matrix_dpnII_corrected \
--correctForMultipleTesting fdr \
--numberOfProcessors 30 \
--minBoundaryDistance 5000 --thresholdComparisons 0.01 --delta 0.04 \
--step 2000

hicFindTADs -m hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected.npz \
--outPrefix TADs/Kc167_sub_merged_hic_matrix_dpnII_corrected \
--correctForMultipleTesting fdr \
--numberOfProcessors 30 \
--minBoundaryDistance 5000 --thresholdComparisons 0.01 --delta 0.04 \
--step 2000


hicFindTADs -m hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected.npz \
--outPrefix TADs/BG3_merged_hic_matrix_dpnII_corrected_strong \
--correctForMultipleTesting fdr \
--numberOfProcessors 30 \
--minBoundaryDistance 5000 --thresholdComparisons 0.01 --delta 0.08 \
--step 2000


hicFindTADs -m hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected.npz \
--outPrefix TADs/Kc167_merged_hic_matrix_dpnII_corrected_strong \
--correctForMultipleTesting fdr \
--numberOfProcessors 30 \
--minBoundaryDistance 5000 --thresholdComparisons 0.01 --delta 0.08 \
--step 2000

hicFindTADs -m hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected.npz \
--outPrefix TADs/Kc167_sub_merged_hic_matrix_dpnII_corrected_strong \
--correctForMultipleTesting fdr \
--numberOfProcessors 30 \
--minBoundaryDistance 5000 --thresholdComparisons 0.01 --delta 0.08 \
--step 2000


################################################################################
# enriched contacts
################################################################################

hicFindEnrichedContacts --matrix hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected.npz \
--outFileName hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected_enriched.npz.h5 --method obs/exp


hicFindEnrichedContacts --matrix hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected.npz \
--outFileName hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected_enriched.npz.h5 --method obs/exp


hicFindEnrichedContacts --matrix hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected.npz \
--outFileName hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected_enriched.npz.h5 --method obs/exp

hicExport --inFile hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected_enriched.npz.h5 \
-o hiCmatrix/Kc167_merged_hic_matrix_dpnII_corrected_enriched --outputFormat GInteractions


hicExport --inFile hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected_enriched.npz.h5 \
-o hiCmatrix/BG3_merged_hic_matrix_dpnII_corrected_enriched --outputFormat GInteractions



hicExport --inFile hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected_enriched.npz.h5 \
-o hiCmatrix/Kc167_sub_merged_hic_matrix_dpnII_corrected_enriched --outputFormat GInteractions

################################################################################
# call loops
################################################################################

#custom genome dm6
cd ~/genome/
bwa index dm6.fa
python /usr/local/juicer/misc/generate_site_positions.py DpnII dm6 dm6.fa
awk 'BEGIN{OFS="\t"}{print $1, $NF}'  dm6_DpnII.txt > dm6.chrom.sizes
awk 'BEGIN{OFS="\t"}{print $1, $NF}'  dm6_DpnII.txt
gawk 'BEGIN{OFS="\t"}{print $1, $NF}'  dm6_DpnII.txt
less dm6.chrom.sizes


cd ~
mkdir -p Kc167_HiC
mkdir -p Kc167_sub_HiC
mkdir -p BG3_HiC


#call preprocessing pipeline
juicer.sh -t 30 -z ~/juicer/genome/dm6.fa -p ~/juicer/genome/dm6.chrom.sizes -y ~/juicer/genome/dm6_DpnII.txt -d ~/Kc167_HiC

juicer.sh -t 30 -z ~/juicer/genome/dm6.fa -p ~/juicer/genome/dm6.chrom.sizes -y ~/juicer/genome/dm6_DpnII.txt -d ~/Kc167_sub_HiC

juicer.sh -t 30 -z ~/juicer/genome/dm6.fa -p ~/juicer/genome/dm6.chrom.sizes -y ~/juicer/genome/dm6_DpnII.txt -d ~/BG3_HiC



#generate the hic files
java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar pre -q 30 -r 500,1000,2000,5000,10000,20000,25000 ~/Kc167_HiC/aligned/merged_nodups.txt ~/Kc167_HiC/aligned/Kc167_HiC.hic ~/juicer/genome/dm6.chrom.sizes
java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar pre -q 30 -r 500,1000,2000,5000,10000,20000,25000 ~/Kc167_sub_HiC/aligned/merged_nodups.txt ~/Kc167_sub_HiC/aligned/Kc167_sub_HiC.hic ~/juicer/genome/dm6.chrom.sizes
java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar pre -q 30 -r 500,1000,2000,5000,10000,20000,25000 ~/BG3_HiC/aligned/merged_nodups.txt ~/BG3_HiC/aligned/BG3_HiC.hic ~/juicer/genome/dm6.chrom.sizes


#call chromatin loops
cd ~/Kc167_HiC
java -Xms512m -Xmx2048m -jar ~/myJuicerDir/scripts/common/juicer_tools.jar hiccups -k KR -r 2000 -f 0.05 -p 5 -i 10 -t 0.02,1.5,1.75,2 -d 20000 aligned/Kc167_HiC.hic Kc167_HiC_hiccups_loops_2kb_05fdr

cd ~/Kc167_sub_HiC
java -Xms512m -Xmx2048m -jar ~/myJuicerDir/scripts/common/juicer_tools.jar hiccups -k KR -r 2000 -f 0.05 -p 5 -i 10 -t 0.02,1.5,1.75,2 -d 20000 aligned/Kc167_sub_HiC.hic Kc167_sub_HiC_hiccups_loops_2kb_05fdr

cd ~/BG3_HiC
java -Xms512m -Xmx2048m -jar ~/myJuicerDir/scripts/common/juicer_tools.jar hiccups -k KR -r 2000 -f 0.05 -p 5 -i 10 -t 0.02,1.5,1.75,2 -d 20000 aligned/BG3_HiC.hic BG3_HiC_hiccups_loops_2kb_05fdr



################################################################################
# call compartments
################################################################################
java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
2L BP 10000 ~/Kc167_HiC/Kc167_HiC_eigen_10Kb_2L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
2R BP 10000 ~/Kc167_HiC/Kc167_HiC_eigen_10Kb_2R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
3L BP 10000 ~/Kc167_HiC/Kc167_HiC_eigen_10Kb_3L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
3R BP 10000 ~/Kc167_HiC/Kc167_HiC_eigen_10Kb_3R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
4 BP 10000 ~/Kc167_HiC/Kc167_HiC_eigen_10Kb_4.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
X BP 10000 ~/Kc167_HiC/Kc167_HiC_eigen_10Kb_X.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
2L BP 10000 ~/BG3_HiC/BG3_HiC_eigen_10Kb_2L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
2R BP 10000 ~/BG3_HiC/BG3_HiC_eigen_10Kb_2R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
3L BP 10000 ~/BG3_HiC/BG3_HiC_eigen_10Kb_3L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
3R BP 10000 ~/BG3_HiC/BG3_HiC_eigen_10Kb_3R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
4 BP 10000 ~/BG3_HiC/BG3_HiC_eigen_10Kb_4.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
X BP 10000 ~/BG3_HiC/BG3_HiC_eigen_10Kb_X.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
2L BP 5000 ~/Kc167_HiC/Kc167_HiC_eigen_5Kb_2L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
2R BP 5000 ~/Kc167_HiC/Kc167_HiC_eigen_5Kb_2R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
3L BP 5000 ~/Kc167_HiC/Kc167_HiC_eigen_5Kb_3L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
3R BP 5000 ~/Kc167_HiC/Kc167_HiC_eigen_5Kb_3R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
4 BP 5000 ~/Kc167_HiC/Kc167_HiC_eigen_5Kb_4.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/Kc167_HiC/aligned/Kc167_HiC.hic \
X BP 5000 ~/Kc167_HiC/Kc167_HiC_eigen_5Kb_X.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
2L BP 5000 ~/BG3_HiC/BG3_HiC_eigen_5Kb_2L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
2R BP 5000 ~/BG3_HiC/BG3_HiC_eigen_5Kb_2R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
3L BP 5000 ~/BG3_HiC/BG3_HiC_eigen_5Kb_3L.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
3R BP 5000 ~/BG3_HiC/BG3_HiC_eigen_5Kb_3R.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
4 BP 5000 ~/BG3_HiC/BG3_HiC_eigen_5Kb_4.txt

java -Xmx10g -jar ~/myJuicerDir/scripts/common/juicer_tools.jar eigenvector \
-p KR ~/BG3_HiC/aligned/BG3_HiC.hic \
X BP 5000 ~/BG3_HiC/BG3_HiC_eigen_5Kb_X.txt


################################################################################
# plots
################################################################################

hicPlotTADs --tracks tracks_toPlot/TADs_track_BG3_150bp_diff_min.ini \
--region 2L:12350000-12500000 --fontSize 14 --dpi 75 \
-out plots/TADs_track_BG3_only_150bp_2L_12350000_12500000.png -t "HiC (2L:12350000-12500000)"
