zcat clusters_mq10_s10_v2.gz  | sed 's/\([0-9]\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)/\1\t\2\t\3\t\4\t\5\t\6/g' | awk '{if ($2==23) {$2="X"}; if ($2==24) {$2="Y"}; print "chr"$2"\t"$3"\t"$5"\t"$1"\t"$6"\t0\t0\t0.0\t0\tchr"$4"\tEDGE"}' > clusters_mq10_s10_v2_formatted



sed 's/\([0-9]\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)\t\(.*\)/\2\t\3\t\4\t\5\t\1\t\6/g' | awk '{if ($1==23) {$1="X"}; if ($1==24) {$1="Y"}; if ($3==23) {$3="X"}; if ($3==24) {$3="Y"}; a="+"; b="+"; if ($5==1) {a="-"; b="-"}; if ($5==2) {a="+"; b="-"}; if ($5==3) {a="-"; b="+"}; print "chr"$1"\t"$2"\t"a"\tchr"$3"\t"$4"\t"b}'

sed 's/\([0-9]\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)\t\(.*\)/\2\t\3\t\4\t\5\t\1\t\6/g' | awk '{if ($1==23) {$1="X"}; if ($1==24) {$1="Y"}; if ($3==23) {$3="X"}; if ($3==24) {$3="Y"}; a="+"; b="+"; if ($5==1) {a="-"; b="-"}; if ($5==2) {a="+"; b="-"}; if ($5==3) {a="-"; b="+"}; print "chr"$1"\t"$2"\t"a"\tchr"$3"\t"$4"\t"b"\t"$6}'


python2.6 overlap.py 1000 /tmp/n.gz /tmp/t.gz  > /tmp/out

cat /tmp/out | awk '{if (NF>4) {print $0}}' | awk '{print $1"\t"$2"\t"$2+1"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'  | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$6+1"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v  | awk '{print $5"\t"$6"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' > filtered_greedy.txt
cat filtered_greedy.txt  | grep chr  | awk '{print $1"\t"$2"\t"$3; print $4"\t"$5"\t"$6; }' | sort | uniq  | awk '{if ($3=="-") {print $1"\t"$2"\t"($2+1000);} else {print $1"\t"($2-1000)"\t"$2;}}'  > dipg29t_removed1000n_nocentro_intervals.txt
while read line ; do echo $line 1>&2 ; ~/samtools view ../merged.sorted.bam `echo $line | sed 's/\(chr\S*\)\s*\([0-9]*\)\s*\([0-9]*\)/\1:\2-\3/g'`; done < dipg29t_removed1000n_nocentro_intervals.txt | sort -S 5g | uniq > uniq.sam

cat dipg29t/n3t10_nocentro_bigger1000  | sort -k 7,7n | sed 's/chr/hs/g' | awk '{print $1"\t"$2-100"\t"$2+100"\t"$4"\t"$5-100"\t"$5+100}' > dipg29t/n3t10_nocentro_bigger1000_circos

cat dipg29t/n3t10_nocentro_refined_arc_bounded | sort -k 7,7n | sed 's/chr/hs/g' | awk '{t=$7/2; if (t<2) {t=2}; if ($t>=2) {print $1"\t"$2-100"\t"$2+100"\t"$4"\t"$5-100"\t"$5+100"\tthickness="t}}' > dipg29t/n3t10_nocentro_bigger1000_circos


cat dipg29t/n3t10_nocentro_refined_arc_bounded | head -n 2 | awk '{x=$3; y=$6; if (x=="+") {if (y=="+") {t=0} else {t=2}} else {if (y=="-") {t=1} else {t=3}}; OFS="\t"; print $1,$2,$5,t,$7,0,0,0.0,0,$4,"EDGE"}'


cat  /dupa-filer/misko/dipg29t/n3t10_nocentro_refined | sed 's/\(chr[^:]*\):\([0-9]*\)\([+-]\)\s\(chr[^:]*\):\([0-9]*\)\([+-]\)/\1\t\2\t\3\t\4\t\5\t\6/g' | awk '{OFS="\t"; type=0; if ($3=="+") {if ($6=="+") {type=0} else {type=2} } else { if ($6=="+") {type=3} else {type=1} }; print $1,$2,$5,type,$7,0,0,0.0,0,$4,"EDGE" }'


 a=0; for x in data_x*.svg; do /Applications/Inkscape.app/Contents/Resources/bin/inkscape --export-png  type1_$a.png --export-background=white $x; a=`expr $a + 1`; done





#take the output of make_clusters and get the normal fitlered (1000bp) , centromere subtracted , 1000bp+ calls
python2.6 ../../overlap.py 1000 ../../DIPG01N/clusters/clusters.txt_f.gz clusters_mq10_cov10.txt_f.gz  |  awk '{if (NF>4) {print $0}}' | awk '{print $1"\t"$2"\t"$2+1"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'  | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$6+1"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v  | awk '{print $5"\t"$6"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | awk '{d=$2-$5; if (d<0) {d=-d}; if ($3==$6 && $1==$4 && d<2000) { } else {print $0}}' > dipg01t_nsubtract_centrosubtract_1000bp
~/samtools view -H ../merged.sorted.bam > sam_headers; cat dipg01t_nsubtract_centrosubtract_1000bp | grep chr  | awk '{OFS="\t"; if ($3=="+") {print $1,$2,"-"} else {print $1,$2,"+"}; if ($6=="+") {print $4,$5,"+"} else {print $4,$5,"-"};}' | sort | uniq  | awk '{if ($3=="-") {print $1"\t"($2-1200)"\t"($2+200)} else {print $1"\t"($2-200)"\t"($2+1200)}}' | while read line ; do echo $line 1>&2 ; ~/samtools view ../merged.sorted.bam `echo $line | sed 's/\(chr\S*\)\s*\([0-9]*\)\s*\([0-9]*\)/\1:\2-\3/g'`; done  | sort -S 5g | uniq | cat sam_headers - | ~/samtools view -Sb -o uniq.bam - ; ~/samtools sort uniq.bam uniq_sorted
~/samtools view uniq_sorted.bam | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/minichr/clustering/cluster_em 327 57 dipg01t_nsubtract_centrosubtract_1000bp /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/indexes/nt_hg19/hg19.fa  > dipg01t_nsubtract_centrosubtract_1000bp_emd
cat dipg01t_nsubtract_centrosubtract_1000bp dipg01t_nsubtract_centrosubtract_1000bp_emd | sort | awk '{c=$1","$2","$3","$4","$5","$6; if (c==a) {s_new=$NF; if (s_new>s_old) {print c,s_new} else {print a,s_old}; a=""; c="";} else {if (a!="") {print a,s_old}; a=c; s_old=$NF}}' | sed 's/,/\t/g' > dipg01t_nsubtract_centrosubtract_1000bp_joined
cat dipg01t_nsubtract_centrosubtract_1000bp_joined | sed 's/\(chr[^:]*\):\([0-9]*\)\([+-]\)\s\(chr[^:]*\):\([0-9]*\)\([+-]\)/\1\t\2\t\3\t\4\t\5\t\6/g' | awk '{OFS="\t"; type=0; if ($3=="+") {if ($6=="+") {type=0} else {type=2} } else { if ($6=="+") {type=3} else {type=1} }; print $1,$2,$5,type,$7,0,0,0.0,0,$4,"EDGE" }' > dipg01t_nsubtract_centrosubtract_1000bp_joined_f
../../scripts/minichr/hmm dipg01t_nsubtract_centrosubtract_1000bp_joined_f ../merged.sorted.bam_q10.cov ../../DIPG01N/merged.sorted.bam_q10.cov > hmm_out 


cat dipg01t_nsubtract_centrosubtract_1000bp_joined | grep chr  | awk '{print $1"\t"$2; print $4"\t"$5; }' | sort | uniq > dipg01t_nsubtract_centrosubtract_1000bp_joined_intervals
~/samtools view uniq_sorted.bam | ../../scripts/minichr/arc_coverage/arc_coverage 327 57 dipg01t_nsubtract_centrosubtract_1000bp_joined_intervals > dipg01t_nsubtract_centrosubtract_1000bp_joined_arc_coveage

#epic one-liner
cat tumor | while read line; do echo $line ; s=`echo $line | sed 's/DIPG\([0-9][0-9]\).*/\1/g'`; python2.6 ./overlap.py 1000 DIPG${s}N*/clusters/clusters.txt_f.gz DIPG${s}T*/clusters/clusters_mq10_cov10.txt_f.gz  |  awk '{if (NF>4) {print $0}}' | awk '{print $1"\t"$2"\t"$2+1"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}'  | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$6+1"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v  | awk '{print $5"\t"$6"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | awk '{d=$2-$5; if (d<0) {d=-d}; if ($3==$6 && $1==$4 && d<2000) { } else {print $0}}' > $line/dipg${s}t_nsubtract_centrosubtract_1000bp; ~/samtools view -H $line/merged.sorted.bam > $line/sam_headers; cat $line/dipg${s}t_nsubtract_centrosubtract_1000bp | grep chr  | awk '{OFS="\t"; if ($3=="+") {print $1,$2,"-"} else {print $1,$2,"+"}; if ($6=="+") {print $4,$5,"+"} else {print $4,$5,"-"};}' | sort | uniq  | awk '{if ($3=="-") {print $1"\t"($2-1200)"\t"($2+200)} else {print $1"\t"($2-200)"\t"($2+1200)}}' | while read linex ; do ~/samtools view $line/merged.sorted.bam `echo $linex | sed 's/\(chr\S*\)\s*\([0-9]*\)\s*\([0-9]*\)/\1:\2-\3/g'`; done  | sort -S 5g | uniq | cat $line/sam_headers - | ~/samtools view -Sb -o $line/clusters/uniq.bam - ; ~/samtools sort $line/clusters/uniq.bam $line/clusters/uniq_sorted; mean=`cat $line/mean_and_std.txt | awk '{print $1}'`; std=`cat $line/mean_and_std.txt | awk '{print $2}'`; ~/samtools view $line/clusters/uniq_sorted.bam | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/minichr/clustering/cluster_em $mean $std $line/dipg${s}t_nsubtract_centrosubtract_1000bp /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/indexes/nt_hg19/hg19.fa > $line/dipg${s}t_nsubtract_centrosubtract_1000bp_emd; cat $line/dipg${s}t_nsubtract_centrosubtract_1000bp $line/dipg${s}t_nsubtract_centrosubtract_1000bp_emd | sort | awk '{c=$1","$2","$3","$4","$5","$6; if (c==a) {s_new=$NF; if (s_new>s_old) {print c,s_new} else {print a,s_old}; a=""; c="";} else {if (a!="") {print a,s_old}; a=c; s_old=$NF}}' | sed 's/,/\t/g' > $line/dipg${s}t_nsubtract_centrosubtract_1000bp_joined;  done
cat tumor | while read line; do echo $line ; s=`echo $line | sed 's/DIPG\([0-9][0-9]\).*/\1/g'`;  echo qsub -l h_vmem=64g /hpf/largeprojects/ccm/amorrison/DIPG_WGS/run_hmm.sh /hpf/largeprojects/ccm/amorrison/DIPG_WGS/DIPG${s}T*/dipg${s}t_*joined /hpf/largeprojects/ccm/amorrison/DIPG_WGS/DIPG${s}T*/merged.sorted.bam_q10.cov /hpf/largeprojects/ccm/amorrison/DIPG_WGS/DIPG${s}N*/merged.sorted.bam_q10.cov /hpf/largeprojects/ccm/amorrison/DIPG_WGS/$line/hmm_out;  done 
