#!/usr/bin/perl -w

$| = 1;

use Getopt::Long;
use POSIX;
use Sort::Naturally;

$USAGE = "\nUSAGE: MASiVE--epipipe-mapping.pl

    source:s          =>  \$source      21/22/24/meth
    muni:s            =>  \$muni        M/U
    lib:s             =>  \$lib
    lite:s            =>  \$lite        0/1
    rerun:s           =>  \$rerun       0/1
    normtype:s        =>  \$normtype    orig/norm_setloc/norm_genloc
    normfile:s        =>  \$normfile
\n";

my $options_res = GetOptions(
    "source:s"          =>  \$source,
    "muni:s"            =>  \$muni,
    "lib:s"             =>  \$lib,
    "lite:s"            =>  \$lite,
    "rerun:s"           =>  \$rerun,
    "normtype:s"        =>  \$normtype,
    "normfile:s"        =>  \$normfile
    );

unless ($source && $lib) { die $USAGE; }

unless (defined($targets))      { $targets      = 'SVs'; }
unless (defined($inno))         { $inno         = 'no'; }
unless (defined($muni))         { $muni         = ''; }
unless (defined($lite))         { $lite         = 0; }
unless (defined($rerun))        { $rerun        = 0; }
unless (defined($normtype))     { $normtype     = 'orig'; } # norm_setloc / norm_genloc


if ($targets =~ /SVs/) {

#############
# Sireviruses
#############

    $cleanIDs = "6456_disinfected.SVs";
    #$cleanIDs = "2973_rejected.SVs";
    if (-e "$cleanIDs") {
        open (CLEANIDSFILE, "$cleanIDs") || print "\tcannot read $cleanIDs\n";
        while (<CLEANIDSFILE>) {
            if (/\w/) {
                chomp;
                @tabs = split /\t/;
                $clean{$tabs[0]} = 1;
            }
        }
        close CLEANIDSFILE;
    }
    if (-e "SV2reject") {
        open (LEPRA, "SV2reject") || print "\tcannot read SV2reject\n";
        while (<LEPRA>) {
            if (/\w/) {
                chomp;
                @tabs = split /\t/;
                $lepra{$tabs[0]} = 'reject';
            }
        }
        close LEPRA;
    }
    open (ORFIS, "Zmay_chr.fasta.clean.MASiVE.internal.fasta.cleaNex10.corr.getorf.orfis.aa.fasta.hmmscanned.domtbl.parsed.multilevel") || print "\tcannot read Zmay_chr.fasta.clean.MASiVE.internal.fasta.cleaNex10.corr.getorf.orfis.aa.fasta.hmmscanned.domtbl.parsed.multilevel\n";
    while (<ORFIS>) {
        if (/^#/) {
            chomp;
            @tabs = split /\t/;
            $tabs[0] =~ s/# //;
            if ($tabs[0] !~ /\+/) {
                $orfis{$tabs[2]} = $tabs[0];
            } else {
                $lepra{$tabs[2]} = 'orfis';
            }
        }
    }
    close ORFIS;

    if ($inno eq 'in') {
        $gff = "MASiVEdb.Zea_mays.gff.inBLAST.gff.SS";
        $cleanpass = 1;
    } else {
        $gff = "MASiVEdb.Zea_mays.gff.noBLAST.gff.SS";
        $cleanpass = 0;
    }

    open (MATRIX, "MASiVEdb.Zea_mays.matrix.NEW1") || print "\tcannot read MASiVEdb.Zea_mays.matrix.NEW1\n";
    while (<MATRIX>) {
        unless (/^#/) {
            chomp;
            @tabs = split /\t/;
            if ($cleanpass || $clean{$tabs[5]}) {

                delete $lepra{$tabs[5]};

                $fams{$tabs[5]} = $tabs[51];
                $ages{$tabs[5]} = $tabs[13];
                if ($tabs[39] eq '0' || $tabs[39] eq '0.25' || $tabs[39] eq '1.5' || $tabs[39] eq '1.75' || $tabs[39] eq '>3') {
                    if ($tabs[39] =~ /^0/) {
                        $generation = 'a_newborn';
                    } elsif ($tabs[39] eq '>3') {
                        $generation = 'c_veryold';
                    } else {
                        $generation = 'b_midaged';
                    }
                } else {
                    $generation = '';
                }

            } else {
                $lepra{$tabs[5]} = "rest";
            }
        }
    }
    close MATRIX;

    print STDERR "loading $gff\n";
    open (GFF, "$gff") || print "\tcannot read $gff\n";
    while (<GFF>) {
        unless (/^#/) {
            chomp;
            @tabs = split /\t/;
            @split8 = (split /;/, $tabs[8]);
            if ($cleanpass || $clean{$split8[0]}) {
                if ($tabs[2] eq 'Sirevirus') {
                    $coords{$split8[0]}{SV}{from} = $tabs[3];
                    $coords{$split8[0]}{SV}{to} = $tabs[4];
                    $coords{$split8[0]}{SV}{len} = $tabs[5];
                    
                    if (!$rerun) {
                        $split8[0] =~ /Zmay_chr_(\d+)-/;
                        $chr = $1;
                        for $c ($tabs[3]..$tabs[4]) {
                            $tmpmem4flt{"chr$chr"}{$c} = 1;
                        }
                    }

                } elsif ($tabs[2] eq 'long_terminal_repeat') {
                    $coords{$split8[0]}{$split8[6]}{from} = $tabs[3];
                    $coords{$split8[0]}{$split8[6]}{to} = $tabs[4];
                    $coords{$split8[0]}{$split8[6]}{len} = $tabs[5];
                } elsif ($tabs[2] eq 'insert') {
                    $coords{$split8[0]}{insert}{from} = $tabs[3];
                    $coords{$split8[0]}{insert}{to} = $tabs[4];
                    $coords{$split8[0]}{insert}{len} = $tabs[5];
                }
            }
        }
        NEXT:
    }
    close GFF;

}
print STDERR keys(%coords) . " $targets\n";


#############
# methylation
#############

goto SKIP_METH;
$fasta = "MASiVEdb.Zea_mays.gff.noBLAST.fasta.SS";
$inno = 'no';
print STDERR "loading $fasta\n";
open (FASTA, "$fasta") || print "\tcannot read $fasta\n";
while (<FASTA>) {
    chomp;
    if (/^>/) {
        s/^>//;
        $SV = $_;
        $skipSV = 0;
        if (defined $lepra{$_} || !defined $coords{$_}) {
            $skipSV = 1;
        } else {
            /Zmay_chr_(\d+)-([DP])-(\d+)/;
            $chr = "chr$1";
            $dir = $2;
            $SVstart = $3;
        }
    } elsif (!$skipSV && /\S/) {

        #$prev_offset = 0;
        #$mapper = '';
        while (/cg/gi) {
            $offset = pos() - 2;
            if ($dir eq 'D') {
                $c = $SVstart+$offset;
            } else {
                $c = ($SVstart+length($_)-1)-$offset-1;
            }
            ++$meth{$chr}{$c}{'CG+'};
            #print "meth{$chr}{".($SVstart+$offset-1)."}{'CG+'}\n";
            #$mapper .= (' ' x ($offset-$prev_offset-1)) . '^';
            #$prev_offset = $offset;
            pos() = $offset+1;
        }
        #print "\t\t$_\nCG+ Cg\t\t$mapper\n";

        #$prev_offset = 0;
        #$mapper = '';
        while (/cg/gi) {
            $offset = pos() - 2;
            if ($dir eq 'D') {
                $c = $SVstart+$offset+1;
            } else {
                $c = ($SVstart+length($_)-1)-$offset;
            }
            ++$meth{$chr}{$c}{'CG-'};
            #print "meth{$chr}{".($SVstart-$offset+1)."}{'CG-'}\n";
            #$mapper .= (' ' x ($offset-$prev_offset-1)) . '^';
            #$prev_offset = $offset;
            pos() = $offset+1;
        }
        #print "\t\t$_\nCG- cG\t\t$mapper\n";

        #$prev_offset = 0;
        #$mapper = '';
        while (/c[atc]g/gi) {
            $offset = pos() - 3;
            if ($dir eq 'D') {
                $c = $SVstart+$offset;
                ++$meth{$chr}{$c}{'CHG+'};
            } else {
                $c = ($SVstart+length($_)-1)-$offset;
                ++$meth{$chr}{$c}{'CHG-'};
                #print "meth{$chr}{".(($SVstart+length($_)-1)-$offset)."}{'CHG-'}\n";
            }
            #$mapper .= (' ' x ($offset-$prev_offset-1)) . '^';
            #$prev_offset = $offset;
            pos() = $offset+1;
        }
        #print "\t\t$_\nCHG+ C[atc]g\t$mapper\n";

        #$prev_offset = 0;
        #$mapper = '';
        while (/c[atg]g/gi) {
            $offset = pos() - 3;
            if ($dir eq 'D') {
                $c = $SVstart+$offset+2;
                ++$meth{$chr}{$c}{'CHG-'};
            } else {
                $c = ($SVstart+length($_)-1)-$offset-2;
                ++$meth{$chr}{$c}{'CHG+'};
                #print "meth{$chr}{".(($SVstart+length($_)-1)-$offset-2)."}{'CHG+'}\n";
            }
            #$mapper .= (' ' x ($offset-$prev_offset-1)) . '^';
            #$prev_offset = $offset;
            pos() = $offset+1;
        }
        #print "\t\t$_\nCHG- c[atg]G\t$mapper\n";

        #$prev_offset = 0;
        #$mapper = '';
        while (/c[atc][atc]/gi) {
            $offset = pos() - 3;
            if ($dir eq 'D') {
                $c = $SVstart+$offset;
                ++$meth{$chr}{$c}{'CHH+'};
            } else {
                $c = ($SVstart+length($_)-1)-$offset;
                ++$meth{$chr}{$c}{'CHH-'};
                #print "meth{$chr}{".(($SVstart+length($_)-1)-$offset)."}{'CHH-'}\n";
            }
            #$mapper .= (' ' x ($offset-$prev_offset-1)) . '^';
            #$prev_offset = $offset;
            pos() = $offset+1;
        }
        #print "\t\t$_\nCHH+C[atc][atc]\t$mapper\n";
        
        #$prev_offset = 0;
        #$mapper = '';
        while (/[atg][atg]g/gi) {
            $offset = pos() - 3;
            if ($dir eq 'D') {
                $c = $SVstart+$offset+2;
                ++$meth{$chr}{$c}{'CHH-'};
            } else {
                $c = ($SVstart+length($_)-1)-$offset-2;
                ++$meth{$chr}{$c}{'CHH+'};
                #print "meth{$chr}{".(($SVstart+length($_)-1)-$offset-2)."}{'CHH+'}\n";
            }
            #$mapper .= (' ' x ($offset-$prev_offset-1)) . '^';
            #$prev_offset = $offset;
            pos() = $offset+1;
        }
        #print "\t\t$_\nCHH-[atg][atg]G\t$mapper\n";

        ++$keptSVs;
    }
}
close FASTA;
print STDERR "studied meth motifs on $keptSVs SVs\n";
foreach $chr (keys %meth) {
    foreach $coord (sort {$a<=>$b} keys %{$meth{$chr}}) {
        foreach $type (nsort keys %{$meth{$chr}{$coord}}) {
            print "$chr\t$coord\t$type\t$meth{$chr}{$coord}{$type}\n";
        }
    }
}

open (SHOFLT, ">sho.flt") || print "\tcannot create sho.flt\n";
for ($i=1;$i<=2;$i++) {
    if ($i==1) {
        $folder = "final.datasets/zmametP";
        $tag = '+';
    } else {
        $folder = "final.datasets/zmametM";
        $tag = '-';
    }
    @files = glob("$folder/chr*.out");
    foreach $file (@files) {
        $file =~ /(chr\d+)\.out/;
        $chr = $1;
        print STDERR "loading Sho $folder/$file\n"; # coords start from zero !
        open (SHO, "$file") || print "\tcannot read $file\n";
        while (<SHO>) {
            @tabs = split /\t/;
            if (defined $tmpmem4flt{$chr}{$tabs[0]+1}) {
                print SHOFLT "$chr$tag\t$_";
                $methtype = "$tabs[2]$tag".lc($tabs[1]);
                ++$meth{$chr}{$tabs[0]+1}{$methtype};
                $methtypes{"$tabs[2]$tag"} = 1;
            }
        }
        close SHO;
    }
}
close SHOFLT;
print STDERR "loading Sho sho.flt\n";
open (SHOFLT, "sho.flt") || print "\tcannot read sho.flt\n";
while (<SHOFLT>) {
    chomp;
    @tabs = split /\t/;
    $tabs[0] =~ /(chr\d+)([\+-])/;
    $chr = $1;
    $tag = '';
    $methtype = "$tabs[3]$tag".lc($tabs[2]);
    ++$meth{$chr}{$tabs[1]+1}{$methtype};
    $methtypes{"$tabs[3]$tag"} = 1;
}
close SHOFLT;
SKIP_METH:


########
# siRNAs
########

if ($normtype =~ /norm/) {
    print STDERR "loading bed norm loc\n";
    open (LOC, "$normfile") || print "\tcannot read $normfile\n";
    while (<LOC>) {
        if (/\w/) {
            chomp;
            @split = split /:/;
            $norm{$split[0]} = $_;
        }
    }
    close LOC;
}

if ($lite) {
    $no_bins_just_len = 1;
} else {
    $no_bins_just_len = 0;
}

$i = -1;
if (!$rerun) {
    if ($source eq '21') {
        $ia = 1;
        $ib = 2;
    } elsif ($source eq '22') {
        $ia = 3;
        $ib = 4;
    } elsif ($source eq '24') {
        $ia = 5;
        $ib = 6;
    }
    open (CONCHIEFLT, ">conchie.flt.$targets$inno$cleanIDs-vs-$lib.$source") || print "\tcannot create conchie.flt\n";
    select((select(CONCHIEFLT), $| = 1)[0]);
} else {
    $ia = 0;
    $ib = 0;
}
for ($i=$ia;$i<=$ib;$i++) {
    $bed = 'final_datasetsNOV13/' if $i;
    if ($i==1) {
        $bed .= "B73siRNA_21_collapsed_rRNAsFiltered_B73genome_bwa_totalHits_sam_multi.bed" if $lib eq 'B73siRNA';
    } elsif ($i==2) {
        $bed .= "B73siRNA_21_collapsed_rRNAsFiltered_B73genome_bwa_totalHits_sam_unique.bed" if $lib eq 'B73siRNA';
    } elsif ($i==3) {
        $bed .= "B73siRNA_22_collapsed_rRNAsFiltered_B73genome_bwa_totalHits_sam_multi.bed" if $lib eq 'B73siRNA';
    } elsif ($i==4) {
        $bed .= "B73siRNA_22_collapsed_rRNAsFiltered_B73genome_bwa_totalHits_sam_unique.bed" if $lib eq 'B73siRNA';
    } elsif ($i==5) {
        $bed .= "B73siRNA_24_collapsed_rRNAsFiltered_B73genome_bwa_totalHits_sam_multi.bed" if $lib eq 'B73siRNA';
    } elsif ($i==6) {
        $bed .= "B73siRNA_24_collapsed_rRNAsFiltered_B73genome_bwa_totalHits_sam_unique.bed" if $lib eq 'B73siRNA';
    } elsif ($i==0) {
        $bed = "conchie.flt.$lib.$source";
    }
    print STDERR "loading Conchie bed $bed\n"; # coords start from zero !
    open (CONCHIE, "$bed") || print "\tcannot read $bed\n";
    while (<CONCHIE>) {
        if (/\w/) {
            chomp;
            @tabs = split /\t/;
            if ($muni eq '' || $muni ne '' && $tabs[5] eq $muni) {
                if ($rerun || $tabs[0] =~ /chromosome\:AGPv2\:(\d+)\:/) {
                    $tabs[0] = "chr$1" if !$rerun;
                    if ($rerun || (defined $tmpmem4flt{$tabs[0]}{$tabs[1]+1} || defined $tmpmem4flt{$tabs[0]}{$tabs[2]+1})) {
                        $mu = '';
                        if ($normtype =~ /norm/) {
                            if (defined $norm{$tabs[3]}) {
                                $tabs[3] = $norm{$tabs[3]};
                            } else {
                                print STDERR "no norm info for $tabs[3]\n";
                            }
                        }
                        if ($normtype =~ /norm/ && $tabs[3] =~ /$lib\_(\d+)_\d+\-(\d+):(\d+)/) {
                            $len = $1;
                            $spe = 1/$3;
                            $exp = $2/$3;
                        } elsif ($normtype eq 'orig' && $tabs[3] =~ /$lib\_(\d+)_\d+\-(\d+)/) {
                            $len = $1;
                            $spe = 1;
                            $exp = $2;
                        } else {
                            print STDERR " ! $tabs[3]\n";
                        }
                        unless ($no_bins_just_len) {
                            $dir = $tabs[4];
                        } else {
                            $dir = '';
                        }
                        $siRNAtotals{hits} += $spe;
                        $siRNAtotals{exp}  += $exp;
                        for $c ($tabs[1]+1..$tabs[2]) {
                            if ($dir eq '+' || $dir eq '') {
                                $siRNAs{$tabs[0]}{$c+1}{"$len$dir$mu"}{hits} += $spe; # coords start from zero !
                                $siRNAs{$tabs[0]}{$c+1}{"$len$dir$mu"}{exp}  += $exp; # coords start from zero !
                            } elsif ($dir eq '-') {
                                $siRNAs{$tabs[0]}{$c+1}{"$len$dir$mu"}{hits} -= $spe; # coords start from zero !
                                $siRNAs{$tabs[0]}{$c+1}{"$len$dir$mu"}{exp}  -= $exp; # coords start from zero !
                            }
                        }
                        $siRNAtypes{"$len$dir$mu"} = "$len$mu";
                        if (!$rerun) {
                            if ((!defined $tmpmem4flt{$tabs[0]}{$tabs[1]+1} && defined $tmpmem4flt{$tabs[0]}{$tabs[2]+1}) || (defined $tmpmem4flt{$tabs[0]}{$tabs[1]+1} && !defined $tmpmem4flt{$tabs[0]}{$tabs[2]+1})) {
                                ++$overhangs{$bed}{$len};
                            }
                            print CONCHIEFLT "$tabs[0]\t$tabs[1]\t$tabs[2]\t$tabs[3]\t$tabs[4]\t$tabs[6]\n";
                        }
                    }
                }
            }
        }
    }
    close CONCHIE;
}
if (!$rerun) {
    close CONCHIEFLT;

    open (CONCHIELOG, ">conchie.flt.$targets$inno$cleanIDs-vs-$lib.$source.log") || print "\tcannot create conchie.flt.$targets$inno$cleanIDs-vs-$lib.$source.log\n";
    foreach $bedfile (sort keys %overhangs) {
        foreach $length (sort keys %{$overhangs{$bedfile}}) {
            print CONCHIELOG "$overhangs{$bedfile}{$length} occurences of overhanging $length nt siRNAs in $bedfile\n";
        }
    }
    close CONCHIELOG;
    undef %overhangs;

    undef %tmpmem4flt;
}
if ($normtype =~ /norm/) {
    undef %norm;
}

if (!%siRNAs || keys(%siRNAs) == 0) {
    print STDERR "no data to look at, no kookoobading\n"; exit;
}

SKIP_SIRNA:


###########
# reporting
###########

$bins = 100;
$bins_offset{'5prime'} = 0;
$bins_offset{internal} = 100;
$bins_offset{'3prime'} = 200;
$bins_offset{insert} = 300;

$regions2sort{'5prime'} = 1;
$regions2sort{internal} = 2;
$regions2sort{'3prime'} = 3;
$regions2sort{insert} = 4;

print STDERR "reporting\n";

$source .= $muni;
if ($no_bins_just_len) {
    $source .= 'lite';
}
$normtype .= 'HD';
unless ($no_bins_just_len) {
    open (BIN, ">MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.siRNA.$normtype.bin") || print "\tcannot create MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.siRNA.$normtype.bin\n";
    select((select(BIN), $| = 1)[0]);
    print BIN "inno\tgroups\tSV\tfam\tage\tagegroup.corrected\tenv\td2c\tbin\tregion\tsiRNAcat\tsiRNAtype\thits\tnorm_hits\texp\tnorm_exp\tnorm_ave_exp\tORFis\n";
}
if ($targets eq 'SVs') {
    open (REG, ">MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.siRNA.$normtype.reg") || print "\tcannot create MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.siRNA.$normtype.reg\n";
    select((select(REG), $| = 1)[0]);
    print REG "inno\tgroups\tSV\tfam\tage\tagegroup.corrected\tenv\td2c\tregion\tsiRNAcat\tsiRNAtype\thits\tnorm_hits\texp\tnorm_exp\tnorm_ave_exp\tORFis\n";
}
open (SV, ">MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.siRNA.$normtype.sv") || print "\tcannot create MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.siRNA.$normtype.sv\n";
select((select(SV), $| = 1)[0]);
print SV "inno\tgroups\tSV\tfam\tage\tagegroup.corrected\tenv\td2c\tregion\tsiRNAcat\tsiRNAtype\thits\tnorm_hits\texp\tnorm_exp\tnorm_ave_exp\tORFis\n";

#open (METHBIN, ">MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.meth.bin") || print "\tcannot create MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.meth.bin\n";
#print METHBIN "inno\tgroups\tSV\tfam\tage\tagegroup.corrected\tenv\td2c\tbin\tregion\tmeth_info\tmeth_motif\tmeth_type\tmeth_count\tmeth_norm\tmeth_rel\n";
#open (METHREG, ">MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.meth.reg") || print "\tcannot create MASiVE.epipipe.$targets$inno$cleanIDs-vs-$lib.$source.meth.reg\n";
#print METHREG "inno\tgroups\tSV\tfam\tage\tagegroup.corrected\tenv\td2c\tregion\tmeth_info\tmeth_motif\tmeth_type\tmeth_count\tmeth_norm\tmeth_rel\n";

foreach $SV (nsort keys %coords) {
    if ($targets eq 'SVs') {
        $SV =~ /Zmay_chr_(\d+)-/;
        $new_chr = $1;
    } else {
        $SV =~ /^chr(\d+)-/;
        $new_chr = $1;
    }
    $chr = "chr$new_chr";
    $prev_chr = $new_chr - 1;
    delete $siRNAs{$prev_chr};

    unless (defined $groups{$SV})   { $groups{$SV} = 'na'; }
    unless (defined $fams{$SV})     { $fams{$SV} = 'na'; }
    $agegroup = 'na';
    unless (defined $ages{$SV})     { $ages{$SV} = 'na'; } #else { $agegroup = $agegroups{$SV};} #sprintf("%1.0f",2 * $ages{$SV}) / 2; }
    unless (defined $envs{$SV})     { $envs{$SV} = 'na'; }
    unless (defined $d2c{$SV})      { $d2c{$SV} = 'na'; }
    unless (defined $orfis{$SV})    { $orfis{$SV} = 'na'; }
    undef %annotation;
    undef %counters;
    undef %per_bin;
    undef %per_reg;
    undef %per_sv;
    undef %bin_sizes;
    undef %bin2annotation;

    $running = 0;
    if ($targets eq 'SVs') {
        for ($c=$coords{$SV}{'5prime'}{from};$c<=$coords{$SV}{'5prime'}{to};$c++) {
            $annotation{$c} = '5prime';
        }
        for ($c=$coords{$SV}{'3prime'}{from};$c<=$coords{$SV}{'3prime'}{to};$c++) {
            $annotation{$c} = '3prime';
        }
        if (exists $coords{$SV}{insert}) {
            for ($c=$coords{$SV}{insert}{from};$c<=$coords{$SV}{insert}{to};$c++) {
                $annotation{$c} = 'insert';
            }
            $coords{$SV}{internal}{len} = $coords{$SV}{SV}{len} - $coords{$SV}{'5prime'}{len} - $coords{$SV}{'3prime'}{len} - $coords{$SV}{'insert'}{len};
        } else {
            $coords{$SV}{internal}{len} = $coords{$SV}{SV}{len} - $coords{$SV}{'5prime'}{len} - $coords{$SV}{'3prime'}{len};
        }
    }
    if ($SV =~ /-D-/ || $targets eq 'exons') {
        for ($c=$coords{$SV}{SV}{from};$c<=$coords{$SV}{SV}{to};$c++) {
            unless (exists $annotation{$c}) {
                $annotation{$c} = 'internal';
            }
            ++$running;
            unless ($no_bins_just_len) {
                ++$counters{$annotation{$c}};
                $bin = ceil(($counters{$annotation{$c}}*$bins) / $coords{$SV}{$annotation{$c}}{len});
                $offset_bin = $bin + $bins_offset{$annotation{$c}};
                $bin_sizes{$offset_bin} = $coords{$SV}{$annotation{$c}}{len} / $bins;
                $bin2annotation{$offset_bin} = $annotation{$c};
            }
            foreach $siRNAtype (keys %{$siRNAs{$chr}{$c}}) {
                if (defined $siRNAs{$chr}{$c}{$siRNAtype}{hits}) {
                    unless ($no_bins_just_len) {
                        $per_bin{$offset_bin}{$siRNAtype}{hits} += $siRNAs{$chr}{$c}{$siRNAtype}{hits};
                        $per_bin{$offset_bin}{$siRNAtype}{exp} += $siRNAs{$chr}{$c}{$siRNAtype}{exp};
                    }
                    unless ($targets eq 'exons') {
                        $per_reg{$annotation{$c}}{$siRNAtype}{hits} += $siRNAs{$chr}{$c}{$siRNAtype}{hits};
                        $per_reg{$annotation{$c}}{$siRNAtype}{exp} += $siRNAs{$chr}{$c}{$siRNAtype}{exp};
                    }
                    $per_sv{$siRNAtype}{hits} += $siRNAs{$chr}{$c}{$siRNAtype}{hits};
                    $per_sv{$siRNAtype}{exp} += $siRNAs{$chr}{$c}{$siRNAtype}{exp};
                }
            }
            foreach $methtype (keys %{$meth{$chr}{$c}}) {
                $per_bin{$offset_bin}{$methtype} += $meth{$chr}{$c}{$methtype};
                $per_reg{$annotation{$c}}{$methtype} += $meth{$chr}{$c}{$methtype};
                #$per_sv{$methtype} += $meth{$chr}{$c}{$methtype};
            }
       }
    } elsif ($SV =~ /-P-/) {
        unless ($no_bins_just_len) {
            $prosimo = -1;
        } else {
            $prosimo = 1;
        }
        for ($c=$coords{$SV}{SV}{to};$c>=$coords{$SV}{SV}{from};$c--) {
            unless (exists $annotation{$c}) {
                $annotation{$c} = 'internal';
            }
            ++$running;
            unless ($no_bins_just_len) {
                ++$counters{$annotation{$c}};
                $bin = ceil(($counters{$annotation{$c}}*$bins) / $coords{$SV}{$annotation{$c}}{len});
                $offset_bin = $bin + $bins_offset{$annotation{$c}};
                $bin_sizes{$offset_bin} = $coords{$SV}{$annotation{$c}}{len} / $bins;
                $bin2annotation{$offset_bin} = $annotation{$c};
            }
            foreach $siRNAtype (keys %{$siRNAs{$chr}{$c}}) {
                ### reversing sign for P SVs because of siRNA mapping on raw genomic DNA
                $siRNAtype2mem = $siRNAtype;
                $siRNAtype2mem =~ tr/\+\-/-+/;
                if (defined $siRNAs{$chr}{$c}{$siRNAtype}{hits}) {
                    ### reversing sign for P SVs because of siRNA mapping on raw genomic DNA
                    unless ($no_bins_just_len) {
                        $per_bin{$offset_bin}{$siRNAtype2mem}{hits} += $siRNAs{$chr}{$c}{$siRNAtype}{hits} * $prosimo;
                        $per_bin{$offset_bin}{$siRNAtype2mem}{exp} += $siRNAs{$chr}{$c}{$siRNAtype}{exp} * $prosimo;
                    }
                    $per_reg{$annotation{$c}}{$siRNAtype2mem}{hits} += $siRNAs{$chr}{$c}{$siRNAtype}{hits} * $prosimo;
                    $per_reg{$annotation{$c}}{$siRNAtype2mem}{exp} += $siRNAs{$chr}{$c}{$siRNAtype}{exp} * $prosimo;
                    $per_sv{$siRNAtype2mem}{hits} += $siRNAs{$chr}{$c}{$siRNAtype}{hits} * $prosimo;
                    $per_sv{$siRNAtype2mem}{exp} += $siRNAs{$chr}{$c}{$siRNAtype}{exp} * $prosimo;
                }
            }
            foreach $methtype (keys %{$meth{$chr}{$c}}) {
                $per_bin{$offset_bin}{$methtype} += $meth{$chr}{$c}{$methtype};
                $per_reg{$annotation{$c}}{$methtype} += $meth{$chr}{$c}{$methtype};
                #$per_sv{$methtype} += $meth{$chr}{$c}{$methtype};
            }
        }
    }
    #undef %vec2print;
    unless ($no_bins_just_len) {
        for ($b=1;$b<=$maxbins;$b++) {
            if (defined $bin2annotation{$b}) {
                $b2print = $b - (($regions2sort{$bin2annotation{$b}}-1)*100);
                #foreach $methtype (sort keys %methtypes) {
                #    #$methtype =~ /([CGH]+)([\+-])/;
                #    $methtype =~ /([CGH]+)/;
                #    $motivo = $1;
                #    #$strand = $2;
                #    #$strand = '';
                #    $methtype_m = $methtype . 'm';
                #    $methtype_u = $methtype . 'u';
                #    if (defined $per_bin{$b}{$methtype_m} || defined $per_bin{$b}{$methtype_u}) {
                #        #$norm_content = sprintf "%.15f", $per_bin{$b}{$methtype}/$bin_sizes{$b};
                #        
                #        unless (defined $per_bin{$b}{$methtype_m}) {
                #            $per_bin{$b}{$methtype_m} = 0;
                #        }
                #        unless (defined $per_bin{$b}{$methtype_u}) {
                #            $per_bin{$b}{$methtype_u} = 0;
                #        }
                #        $norm_cover = 0;
                #        $norm_m = 0;
                #        $norm_u = 0;
                #        #$cover_content = 0;
                #        $m_cover = 0;
                #        $u_cover = 0;
                #        $norm_cover = sprintf "%.15f", ($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u})/$bin_sizes{$b};
                #        $norm_m = sprintf "%.15f", $per_bin{$b}{$methtype_m}/$bin_sizes{$b};
                #        $norm_u = sprintf "%.15f", $per_bin{$b}{$methtype_u}/$bin_sizes{$b};
                #        if ($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u} > 0) {
                #            #$cover_content = sprintf "%.15f", ($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u})/$per_bin{$b}{$methtype};
                #            $m_cover = sprintf "%.15f", $per_bin{$b}{$methtype_m}/($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u});
                #            $u_cover = sprintf "%.15f", $per_bin{$b}{$methtype_u}/($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u});
                #            print METHBIN "$SV\t$fams{$SV}\tdisinfe\t$ages{$SV}\t$age_groups{$SV}\t$generations{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\tmeth_m\t$motivo\t$per_bin{$b}{$methtype_m}\t$per_bin{$b}{$methtype_u}\t$m_cover\n";
                #        }
                #        #if ($strand eq '-') {
                #        #    $norm_content = -$norm_content;
                #        #    $norm_cover = -$norm_cover;
                #        #    $cover_content = -$cover_content;
                #        #    $norm_m = -$norm_m;
                #        #    $m_cover = -$m_cover;
                #        #    $norm_u = -$norm_u;
                #        #    $u_cover = -$u_cover;
                #        #}
                #        #print METHBIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\tmeth_content\t$motivo\t$motivo$strand\t$per_bin{$b}{$methtype}\t$norm_content\t$norm_content\n";
                #        #if ($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u} > 0) {
                #        #    print METHBIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\tmeth_cover\t$motivo\t$motivo$strand\t".($per_bin{$b}{$methtype_m}+$per_bin{$b}{$methtype_u})."\t$norm_cover\t$cover_content\n";
                #        #    print METHBIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\tmeth_m\t$motivo\t$motivo$strand\t$per_bin{$b}{$methtype_m}\t$norm_m\t$m_cover\n";
                #        #    print METHBIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\tmeth_u\t$motivo\t$motivo$strand\t$per_bin{$b}{$methtype_u}\t$norm_u\t$u_cover\n";
                #        #}
                #    #} else {
                #    #    print METHBIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\tmeth_content\t$motivo\t$motivo$strand\t0\t0\t0\n";
                #    }
                #}
                foreach $siRNAtype (sort keys %siRNAtypes) {
                    if (defined $per_bin{$b}{$siRNAtype}) {
                        $norm_exp = sprintf "%.15f", $per_bin{$b}{$siRNAtype}{exp}/$bin_sizes{$b};
                        $norm_hits = sprintf "%.15f", $per_bin{$b}{$siRNAtype}{hits}/$bin_sizes{$b};
                        $norm_ave_exp = sprintf "%.15f", ($per_bin{$b}{$siRNAtype}{exp}/$bin_sizes{$b}) / ($per_bin{$b}{$siRNAtype}{hits}/$bin_sizes{$b});
                        if ($norm_exp < 0) {
                            $norm_ave_exp = -$norm_ave_exp;
                        }
                        #$rpkm_exp = sprintf "%.15f", ($per_bin{$b}{$siRNAtype}{exp}*1000000)/($siRNAtotals{exp}*$bin_sizes{$b});
                        #$rpkm_hits = sprintf "%.15f", ($per_bin{$b}{$siRNAtype}{hits}*1000000)/($siRNAtotals{hits}*$bin_sizes{$b});
                        $per_bin{$b}{$siRNAtype}{exp} = sprintf "%.15f", $per_bin{$b}{$siRNAtype}{exp};
                        $per_bin{$b}{$siRNAtype}{hits} = sprintf "%.15f", $per_bin{$b}{$siRNAtype}{hits};
                        print BIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\t$siRNAtypes{$siRNAtype}\t$siRNAtype\t$per_bin{$b}{$siRNAtype}{hits}\t$norm_hits\t$per_bin{$b}{$siRNAtype}{exp}\t$norm_exp\t$norm_ave_exp\t$orfis{$SV}\n";
                    } else {
                        print BIN "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$b2print\t$regions2sort{$bin2annotation{$b}}--$bin2annotation{$b}\t$siRNAtypes{$siRNAtype}\t$siRNAtype\t0\t0\t0\t0\t0\t$orfis{$SV}\n";
                    }
                }
            }
        }
    }
    if ($targets eq 'SVs') {
        foreach $region (sort keys %per_reg) {
            #foreach $methtype (sort keys %methtypes) {
            #    $methtype =~ /([CGH]+)([\+-])/;
            #    $motivo = $1;
            #    $strand = $2;
            #    if (defined $per_reg{$region}{$methtype}) {
            #        $norm_content = sprintf "%.15f", $per_reg{$region}{$methtype}/$coords{$SV}{$region}{len};
            #        
            #        $methtype_m = $methtype . 'm';
            #        $methtype_u = $methtype . 'u';
            #        unless (defined $per_reg{$region}{$methtype_m}) {
            #            $per_reg{$region}{$methtype_m} = 0;
            #        }
            #        unless (defined $per_reg{$region}{$methtype_u}) {
            #            $per_reg{$region}{$methtype_u} = 0;
            #        }
            #        $norm_cover = 0;
            #        $norm_m = 0;
            #        $norm_u = 0;
            #        $cover_content = 0;
            #        $m_cover = 0;
            #        $u_cover = 0;
            #        $norm_cover = sprintf "%.15f", ($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u})/$coords{$SV}{$region}{len};
            #        $norm_m = sprintf "%.15f", $per_reg{$region}{$methtype_m}/$coords{$SV}{$region}{len};
            #        $norm_u = sprintf "%.15f", $per_reg{$region}{$methtype_u}/$coords{$SV}{$region}{len};
            #        if ($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u} > 0) {
            #            $cover_content = sprintf "%.15f", ($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u})/$per_reg{$region}{$methtype};
            #            $m_cover = sprintf "%.15f", $per_reg{$region}{$methtype_m}/($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u});
            #            $u_cover = sprintf "%.15f", $per_reg{$region}{$methtype_u}/($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u});
            #        }
            #        if ($strand eq '-') {
            #            $norm_content = -$norm_content;
            #            $norm_cover = -$norm_cover;
            #            $cover_content = -$cover_content;
            #            $norm_m = -$norm_m;
            #            $m_cover = -$m_cover;
            #            $norm_u = -$norm_u;
            #            $u_cover = -$u_cover;
            #        }
            #        print METHREG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\tmeth_content\t$motivo\t$motivo$strand\t$per_reg{$region}{$methtype}\t$norm_content\t$norm_content\n";
            #        if ($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u} > 0) {
            #            print METHREG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\tmeth_cover\t$motivo\t$motivo$strand\t".($per_reg{$region}{$methtype_m}+$per_reg{$region}{$methtype_u})."\t$norm_cover\t$cover_content\n";
            #            print METHREG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\tmeth_m\t$motivo\t$motivo$strand\t$per_reg{$region}{$methtype_m}\t$norm_m\t$m_cover\n";
            #            print METHREG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\tmeth_u\t$motivo\t$motivo$strand\t$per_reg{$region}{$methtype_u}\t$norm_u\t$u_cover\n";
            #        }
            #    } else {
            #        print METHREG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\tmeth_content\t$motivo\t$motivo$strand\t0\t0\t0\n";
            #    }
            #}
            foreach $siRNAtype (sort keys %siRNAtypes) {
                if (defined $per_reg{$region}{$siRNAtype}) {
                    $norm_exp = sprintf "%.15f", $per_reg{$region}{$siRNAtype}{exp}/$coords{$SV}{$region}{len};
                    $norm_hits = sprintf "%.15f", $per_reg{$region}{$siRNAtype}{hits}/$coords{$SV}{$region}{len};
                    $norm_ave_exp = sprintf "%.15f", ($per_reg{$region}{$siRNAtype}{exp}/$coords{$SV}{$region}{len}) / ($per_reg{$region}{$siRNAtype}{hits}/$coords{$SV}{$region}{len});
                    if ($norm_exp < 0) {
                        $norm_ave_exp = -$norm_ave_exp;
                    }
                    $per_reg{$region}{$siRNAtype}{exp} = sprintf "%.15f", $per_reg{$region}{$siRNAtype}{exp};
                    $per_reg{$region}{$siRNAtype}{hits} = sprintf "%.15f", $per_reg{$region}{$siRNAtype}{hits};
                    print REG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\t$siRNAtypes{$siRNAtype}\t$siRNAtype\t$per_reg{$region}{$siRNAtype}{hits}\t$norm_hits\t$per_reg{$region}{$siRNAtype}{exp}\t$norm_exp\t$norm_ave_exp\t$orfis{$SV}\n";
                } else {
                    print REG "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\t$regions2sort{$region}--$region\t$siRNAtypes{$siRNAtype}\t$siRNAtype\t0\t0\t0\t0\t0\t$orfis{$SV}\n";
                }
            }
        }
    }
    foreach $siRNAtype (sort keys %siRNAtypes) {
        if (defined $per_sv{$siRNAtype}) {
            $norm_exp = sprintf "%.15f", $per_sv{$siRNAtype}{exp}/$coords{$SV}{SV}{len};
            $norm_hits = sprintf "%.15f", $per_sv{$siRNAtype}{hits}/$coords{$SV}{SV}{len};
            $norm_ave_exp = sprintf "%.15f", ($per_sv{$siRNAtype}{exp}/$coords{$SV}{SV}{len}) / ($per_sv{$siRNAtype}{hits}/$coords{$SV}{SV}{len});
            if ($norm_exp < 0) {
                $norm_ave_exp = -$norm_ave_exp;
            }
            $per_sv{$siRNAtype}{exp} = sprintf "%.15f", $per_sv{$siRNAtype}{exp};
            $per_sv{$siRNAtype}{hits} = sprintf "%.15f", $per_sv{$siRNAtype}{hits};
            print SV "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\tFL\t$siRNAtypes{$siRNAtype}\t$siRNAtype\t$per_sv{$siRNAtype}{hits}\t$norm_hits\t$per_sv{$siRNAtype}{exp}\t$norm_exp\t$norm_ave_exp\t$orfis{$SV}\n";
        } else {
            print SV "$inno\t$groups{$SV}\t$SV\t$fams{$SV}\t$ages{$SV}\t$agegroup\t$envs{$SV}\t$d2c{$SV}\tFL\t$siRNAtypes{$siRNAtype}\t$siRNAtype\t0\t0\t0\t0\t0\t$orfis{$SV}\n";
        }
    }
    delete $coords{$SV};
}
#close METHBIN;
#close METHREG;

exit;
