(1) CTCF

#Download
/data/users/liaoy12/Softwares/sratoolkit.2.9.2-centos_linux64/bin/fastq-dump.2.9.2 --split-3 --gzip SRR066846
/data/users/liaoy12/Softwares/sratoolkit.2.9.2-centos_linux64/bin/fastq-dump.2.9.2 --split-3 --gzip SRR066849

#Mapping
module load bowtie2/2.2.7 
module load samtools
export REF=/data/users/liaoy12/liaoy12/Pseudoobscura/ChIP/CTCF/Dpseudo_PacBioV2_genomic.fasta
export FQ1=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/CTCF/SRR066846.fastq.gz
export FQ2=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/CTCF/SRR066849.fastq.gz
bowtie2-build $REF dpse
bowtie2 -x dpse -U $FQ1 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ1.sam
bowtie2 -x dpse -U $FQ2 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ2.sam
samtools view -bS $FQ1.sam > $FQ1.bam
samtools view -bS $FQ2.sam > $FQ2.bam
samtools sort -@ 48 $FQ1.bam -o $FQ1.sorted.bam
samtools sort -@ 48 $FQ2.bam -o $FQ2.sorted.bam
for number in {1..2};
    do samtools index $FQ${number}.sorted.bam;
done;

#Peak calling
module load macs2/2.0.10
macs2 callpeak -t CTCF.bam -c input.bam -f BAM -g 1.5e8 -n CTCF -q 0.01

#Calculating the distribution of binding sites within 40kb around the TAD boundaries at 1kb window
perl ChIPoverlapTAD.pl Dpse_hicexplorer.domains.bed CTCF.peaks.list > HiCExploer.CTCF.list
perl SumDistribution.pl HiCExploer.CTCF.listt > HiCExplorer.CTCF.final.list

(2) BEAF

#Download data from NCBI
/data/users/liaoy12/Softwares/sratoolkit.2.9.2-centos_linux64/bin/fastq-dump.2.9.2 --split-3 --gzip SRR407391
/data/users/liaoy12/Softwares/sratoolkit.2.9.2-centos_linux64/bin/fastq-dump.2.9.2 --split-3 --gzip SRR407390

#Mapping 
module load bowtie2/2.2.7 
module load samtools
export REF=/data/users/liaoy12/liaoy12/Pseudoobscura/ChIP/CTCF/Dpseudo_PacBioV2_genomic.fasta
export FQ1=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/BEAF/SRR407390.fastq.gz
export FQ2=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/BEAF/SRR407391.fastq.gz
bowtie2-build $REF dpse
bowtie2 -x dpse -U $FQ1 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ1.sam
bowtie2 -x dpse -U $FQ2 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ2.sam
samtools view -bS $FQ1.sam > $FQ1.bam
samtools view -bS $FQ2.sam > $FQ2.bam
samtools sort -@ 48 $FQ1.bam -o $FQ1.sorted.bam
samtools sort -@ 48 $FQ2.bam -o $FQ2.sorted.bam
for number in {1..2};
    do samtools index $FQ${number}.sorted.bam;
done;
# Peak calling 
module load macs2/2.0.10
macs2 callpeak -t BEAF.bam -c input.bam -f BAM -g 1.5e8 -n BEAF -q 0.01
perl ChIPoverlapTAD.pl Dpse_hicexplorer.domains.bed BEAF.peak.list > BEAF.list
perl SumDistribution.pl BEAF.list > BEAF.final.list

(3) H3K4me3 and H3K27me3
  1. Mapping
module load bowtie2/2.2.7 
module load samtools

export FQ1=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/H3K27me3/SRR1552256.fastq.gz
export FQ2=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/H3K27me3/SRR1552260.fastq.gz
export FQ3=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/H3K27me3/SRR1552261.fastq.gz
export FQ4=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/H3K27me3/SRR1552273.fastq.gz
export FQ5=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/H3K27me3/SRR1552277.fastq.gz
export FQ6=/data/users/liaoy12/liaoy12/Data/Dpseu/CHIP_seq/H3K27me3/SRR1552278.fastq.gz

bowtie2 -x dpse -U $FQ1 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ1.sam
bowtie2 -x dpse -U $FQ2 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ2.sam
bowtie2 -x dpse -U $FQ3 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ3.sam
bowtie2 -x dpse -U $FQ4 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ4.sam
bowtie2 -x dpse -U $FQ5 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ5.sam
bowtie2 -x dpse -U $FQ6 -D 20 -R 3 -N 0 -L 20 -i S,1,0.50 -S $FQ6.sam

samtools view -bS $FQ1.sam > $FQ1.bam
samtools view -bS $FQ2.sam > $FQ2.bam
samtools view -bS $FQ3.sam > $FQ3.bam
samtools view -bS $FQ4.sam > $FQ4.bam
samtools view -bS $FQ5.sam > $FQ5.bam
samtools view -bS $FQ6.sam > $FQ6.bam

samtools sort -@ $NSLOTS $FQ1.bam -o $FQ1.sorted.bam
samtools sort -@ $NSLOTS $FQ2.bam -o $FQ2.sorted.bam
samtools sort -@ $NSLOTS $FQ3.bam -o $FQ3.sorted.bam
samtools sort -@ $NSLOTS $FQ4.bam -o $FQ4.sorted.bam
samtools sort -@ $NSLOTS $FQ5.bam -o $FQ5.sorted.bam
samtools sort -@ $NSLOTS $FQ6.bam -o $FQ6.sorted.bam

samtools index $FQ1.sorted.bam
samtools index $FQ2.sorted.bam
samtools index $FQ3.sorted.bam
samtools index $FQ4.sorted.bam
samtools index $FQ5.sorted.bam
samtools index $FQ6.sorted.bam

samtools merge H3K27me3.merge.bam SRR1552256.fastq.gz.sorted.bam SRR1552273.fastq.gz.sorted.bam
samtools merge H3K4.merge.bam SRR1552260.fastq.gz.sorted.bam SRR1552277.fastq.gz.sorted.bam
samtools merge input.merge.bam SRR1552278.fastq.gz.sorted.bam SRR1552261.fastq.gz.sorted.bam

samtools sort -@ $NSLOTS H3K27me3.merge.bam -o H3K27me3.merge.sorted.bam
samtools sort -@ $NSLOTS H3K4.merge.bam -o H3K4.merge.sorted.bam
samtools sort -@ $NSLOTS input.merge.bam -o input.merge.sorted.bam

samtools index H3K27me3.merge.sorted.bam
samtools index H3K4.merge.sorted.bam
samtools indext input.merge.sorted.bam

 2. Tobigwig file
bamCompare -b1 H3K4.merge.sorted.bam -b2 input.merge.sorted.bam --binSize 10 --operation log2 --minMappingQuality 30 --skipNonCoveredRegions --ignoreDupli
cates --numberOfProcessors $NSLOTS -o H3K4.bed -of bedgraph

bamCompare -b1 H3K27.merge.sorted.bam -b2 input.merge.sorted.bam --binSize 10 --operation log2 --minMappingQuality 30 --skipNonCoveredRegions --ignoreDupli
cates --numberOfProcessors $NSLOTS -o H3K27me3.bed -of bedgraph

faSize -detailed /data/users/liaoy12/liaoy12/Pseudoobscura/ChIP/CTCF/Dpseudo_PacBioV2_genomic.fasta > Dpse.sizes
cat H3K4.bed | grep -v tig > H3K4.chr.bed
cat H3K27me3.bed | grep -v tig > H3K27.chr.bed
/data/apps/enthought_python/7.3.2/bin/bedGraphToBigWig H3K4.chr.bed Dpse.sizes H3K4.NEW.bw
/data/apps/enthought_python/7.3.2/bin/bedGraphToBigWig H3K27.chr.bed Dpse.sizes H3K27.NEW.bw

 3. Enrichment analysis and permulation

#!/bin/bash
#$ -N map_sub
#$ -q jje128
#$ -pe openmp 1
#$ -t 1-100

module load anaconda/3.7-5.3.0
module load bedtools
num=$SGE_TASK_ID

perl permutation.pl Dpse_hicexplorer.boundaries.bed $num


#Perl scripts for permutation

####For CTCF and BEAF
#!/usr/local/bin/perl 
use strict;
use warnings;

my $i;
for ($i=($ARGV[1]-1)*100+1;$i<=($ARGV[1]-1)*100+100;$i++) {
`bedtools shuffle -i $ARGV[0] -g Dpse.genome.sizes -excl Dpse.exclude.bed -chrom -seed $i > $ARGV[0].shuffle.$i.bed`;
`perl chip_overlap40kb.pl $ARGV[0].shuffle.$i.bed BEAF.peak.list >> BEAF.simulate.txt`;
`rm $ARGV[0].shuffle.$i.bed`;
}

#### For H3H4me3 and H3K27me3 and ATAC
#!/usr/local/bin/perl 
use strict;
use warnings;

open OUT, ">$ARGV[1].simulate.txt" or die "$!";

my $i;
for ($i=($ARGV[1]-1)*100+1;$i<=($ARGV[1]-1)*100+100;$i++) {
`bedtools shuffle -i $ARGV[0] -g Dpse.genome.sizes -excl Dpse.exclude.bed -chrom -seed $i > $ARGV[0].shuffle.$i.bed`;
`computeMatrix scale-regions -S H3K4.NEW.bw H3K27.NEW.bw ATAC.bw -R $ARGV[0].shuffle.$i.bed --beforeRegionStartLength 40000 --afterRegionStartLength 40000 --regionBodyLen
gth 5000 --binSize 1000 --skipZeros -o $ARGV[0].shuffle.$i.bed_40kb.txt.gz`;
`gunzip $ARGV[0].shuffle.$i.bed_40kb.txt.gz`;
my ($up1,$b1,$down1,$up2,$b2,$down2,$up3,$b3,$down3) = &Cal_ratio("$ARGV[0].shuffle.$i.bed_40kb.txt");
print OUT "$i\t$up1\t$b1\t$down1\t$up2\t$b2\t$down2\t$up3\t$b3\t$down3\n";
`rm $ARGV[0].shuffle.$i.bed_40kb.txt $ARGV[0].shuffle.$i.bed`;
}

#my ($up1,$b1,$down1,$up2,$b2,$down2,$up3,$b3,$down3) = &Cal_ratio("Dpse_hic_matrix.40K_binsize.1kb");
#print "Num: $up1\n$b1\n$down1$up2\t$b2\t$down2\t$up3\t$b3\t$down3\n";

sub Cal_ratio {
my $file = shift;
open In, "$file" or die "$!";
my $up_1=0;
my $mid_1=0;
my $down_1=0;
my $up_2=0;
my $mid_2=0;
my $down_2=0;
my $up_3=0;
my $mid_3=0;
my $down_3=0;

my $upn1=0;
my $midn1=0;
my $downn1=0;
my $upn2=0;
my $midn2=0;
my $downn2=0;
my $upn3=0;
my $midn3=0;
my $downn3=0;

while (<In>) {
next if ($_=~/\@/);
chomp;
my @tmp = split (/\s+/,$_);
my ($m1,$m2,$m3,$m4,$m5,$m6,$m7,$m8,$m9);

for ($m1=6;$m1<=10;$m1++) {
  if ($tmp[$m1]!~/nan/) {
  $upn1++;
  $up_1 = $up_1 + $tmp[$m1];
  }
}

for ($m2=48;$m2<=48;$m2++) {
  if ($tmp[$m2]!~/nan/) {
  $midn1++;
  $mid_1 = $mid_1 + $tmp[$m2];
  }
}

for ($m3=86;$m3<=90;$m3++) {
  if ($tmp[$m3]!~/nan/) {
  $downn1++;
  $down_1 = $down_1 + $tmp[$m3];
  }
}

for ($m4=91;$m4<=95;$m4++) {
  if ($tmp[$m4]!~/nan/) {
  $upn2++;
  $up_2 = $up_2 + $tmp[$m4];
  }
}

for ($m5=133;$m5<=133;$m5++) {
  if ($tmp[$m5]!~/nan/) {
  $midn2++;
  $mid_2 = $mid_2 + $tmp[$m5];
  }
}

for ($m6=171;$m6<=175;$m6++) {
  if ($tmp[$m6]!~/nan/) {
  $downn2++;
  $down_2 = $down_2 + $tmp[$m6];
  }
}


for ($m7=176;$m7<=180;$m7++) {
  if ($tmp[$m7]!~/nan/) {
  $upn3++;
  $up_3 = $up_3 + $tmp[$m7];
  }
}

for ($m8=218;$m8<=218;$m8++) {
  if ($tmp[$m8]!~/nan/) {
  $midn3++;
  $mid_3 = $mid_3 + $tmp[$m8];
  }
}

for ($m9=256;$m9<=260;$m9++) {
  if ($tmp[$m9]!~/nan/) {
  $downn3++;
  $down_3 = $down_3 + $tmp[$m9];
  }
}

}

my $up1_avg = $up_1/$upn1;
my $mid1_avg = $mid_1/$midn1;
my $down1_avg = $down_1/$downn1;

my $up2_avg = $up_2/$upn2;
my $mid2_avg = $mid_2/$midn2;
my $down2_avg = $down_2/$downn2;

my $up3_avg = $up_3/$upn3;
my $mid3_avg = $mid_3/$midn3;
my $down3_avg = $down_3/$downn3;

return ($up1_avg, $mid1_avg, $down1_avg, $up2_avg, $mid2_avg, $down2_avg, $up3_avg, $mid3_avg, $down3_avg);

}





