#!/usr/bin/perl -w

$| = 1;

use POSIX;
use Sort::Naturally;


if (defined $ARGV[0]) {

    open (ZOMBIES, "$ARGV[0]") || die "\tcannot read\n";
    while (<ZOMBIES>) {
        if (/\w/) {
            chomp;
            if (/^((Ji|Opie))/) {
                $fam = $1;
            } elsif (/^Zmay/) {
                @tabs = (split /\s+/);
                $looper4random{$fam}{$tabs[1]}{$tabs[0]} = $_;
            }
        }
    }
    close ZOMBIES;
    foreach $fam (keys %looper4random) {
        foreach $generation (keys %{$looper4random{$fam}}) {
            while (keys(%{$looper{$fam}{$generation}}) < 30) {
                $SV = (keys %{$looper4random{$fam}{$generation}})[rand keys %{$looper4random{$fam}{$generation}}];
                $looper{$fam}{$generation}{$SV} = $looper4random{$fam}{$generation}{$SV};
                $zombies{$SV} = $fam;
            }
        }
    }
    foreach $fam (sort keys %looper) {
        foreach $gen (sort keys %{$looper{$fam}}) {
            print "$fam\t$gen\t".keys(%{$looper{$fam}{$gen}})."\n";
        }
    }
    open (MODANNO, "$ARGV[1]") || die "\tcannot read\n";
    while (<MODANNO>) {
        if (/\w/) {
            chomp;
            @tabs = split /\t/;
            if (defined $zombies{$tabs[3]} && $tabs[5] =~ /prime/) {
                ++$siRNAs{$tabs[0]}{$tabs[3]}{$tabs[6]};
            }
        }
    }
    close MODANNO;

    $all_siRNAtags{'a_newborn'} = 1;
    $all_siRNAtags{'a_newborn b_midaged'} = 1;
    $all_siRNAtags{'a_newborn b_midaged c_veryold'} = 1;
    $all_siRNAtags{'a_newborn c_veryold'} = 1;
    $all_siRNAtags{'b_midaged'} = 1;
    $all_siRNAtags{'b_midaged c_veryold'} = 1;
    $all_siRNAtags{'c_veryold'} = 1;

    open (ZOMBIEBAGS, ">$ARGV[0].$ARGV[1].$$.bagged") || print "\tcannot create\n";
    foreach $fam (sort keys %looper) {
        $newborns = 0;
        foreach $newborn (sort keys %{$looper{$fam}{a_newborn}}) {
            ++$newborns;
            print "$fam $newborns\n";
            foreach $midaged (sort keys %{$looper{$fam}{b_midaged}}) {
                foreach $veryold (sort keys %{$looper{$fam}{c_veryold}}) {

                    $family{$newborn} = 1; $generations{$newborn} = 'a_newborn';
                    $family{$midaged} = 1; $generations{$midaged} = 'b_midaged';
                    $family{$veryold} = 1; $generations{$veryold} = 'c_veryold';
                
                    foreach $siRNA (keys %siRNAs) {
                        foreach $SV (keys %{$siRNAs{$siRNA}}) {
                            if (defined $generations{$SV}) {
                                ++$SVtags{$generations{$SV}};
                            }
                        }
                        $SVtag = '';
                        foreach $SVuniqtag (sort {$a cmp $b} keys %SVtags) {
                            $SVtag .= "$SVuniqtag ";
                        }
                        $SVtag =~ s/ $//;
                        undef %SVtags;
                        ++$siRNAtags{$SVtag}{$siRNA} if $SVtag ne '';
                        ++$taggedsiRNAs{$siRNA} if $SVtag ne '';
                    }
                    foreach $SVtag (sort {$a cmp $b} keys %all_siRNAtags) {
                        $flag = ' ';
                        if (defined $siRNAtags{$SVtag}) {
                            $percent = sprintf "%.0f", keys(%{$siRNAtags{$SVtag}}) * 100 / keys(%taggedsiRNAs);
                            if ($SVtag eq 'a_newborn c_veryold') {
                                if ($percent >= 10) {
                                    $flag = '>';
                                } else {
                                    $flag = '_';
                                }
                            }
                            print ZOMBIEBAGS "$fam\t$flag\t$percent\t".keys(%{$siRNAtags{$SVtag}})."\t".keys(%taggedsiRNAs)."\tunique to\t$SVtag\t$newborn $midaged $veryold\n";
                        } else {
                            if ($SVtag eq 'a_newborn c_veryold') {
                                $flag = '_';
                            }
                            print ZOMBIEBAGS "$fam\t$flag\t0\t0\t".keys(%taggedsiRNAs)."\tunique to\t$SVtag\t$newborn $midaged $veryold\n";
                        }
                    }
                    undef %taggedsiRNAs;
                    undef %siRNAtags;
                    undef %family;
                    undef %generations;
                }
            }
        }
    }
    close ZOMBIEBAGS;

} else {

    $disinfected = '6456_disinfected.SVs';
    if (-e "$disinfected") {
        open (DISINFECTED, "$disinfected") || print "\tcannot read $disinfected\n";
        while (<DISINFECTED>) {
            if (/\w/) {
                chomp;
                @tabs = split /\t/;
                $clean{$tabs[0]} = 1;
            }
        }
        close DISINFECTED;
    }
    if (-e "SV2reject") {
        open (LEPRA, "SV2reject") || print "\tcannot read SV2reject\n";
        while (<LEPRA>) {
            if (/\w/) {
                chomp;
                @tabs = split /\t/;
                $lepra{$tabs[0]} = 'reject';
            }
        }
        close LEPRA;
    }
    open (AGE, "Zmay_chr_ALL.fasta.clean.MASiVE.age") || print "\tcannot read\n";
    while (<AGE>) {
        if (!/^#/) {
            chomp;
            @tabs = split /\t/;
            $age{$tabs[0]} = sprintf "%-7s %-7s %-8s %-7s %-7s %-7s", $tabs[7], "a:$tabs[1]", "*: $tabs[2]", "-: $tabs[3]", "s: $tabs[4]", "i: $tabs[5]";
        }
    }
    close AGE;
    open (INDELS, "Zmay_chr_ALL.fasta.clean.MASiVE.age.indels") || print "\tcannot read\n";
    while (<INDELS>) {
        if (!/^#/) {
            chomp;
            @tabs = split /\t/;
            $age{$tabs[0]} .= "$tabs[2] $tabs[3]";
        }
    }
    close INDELS;
    open (SIRNACOUNTS, "conchie.flt.NOV13.212224.mod.anno.siRNAs_on_SVdomains_count") || print "\tcannot read conchie.flt.NOV13.212224.mod.anno.siRNAs_on_SVdomains_count\n";
    while (<SIRNACOUNTS>) {
        if (!/^SVid/) {
            chomp;
            @tabs = split /\t/;
            if ($tabs[3] eq '24') {
                $siRNAcounts{$tabs[0]}{$tabs[2]}{$tabs[3]} = $tabs[4];
            }
        }
    }
    close SIRNACOUNTS;
    open (MODANNO, "conchie.flt.NOV13.24.mod.anno.6456_disinfected") || print "\tcannot read\n";
    while (<MODANNO>) {
        if (/\w/) {
            chomp;
            @tabs = split /\t/;
            if (defined $clean{$tabs[3]} && $tabs[5] =~ /prime/ && ($tabs[4] eq 'Ji' || $tabs[4] eq 'Opie')) {
                ++$siRNAcounts_det{$tabs[3]}{$tabs[5]}{$tabs[0]}{$tabs[6]};
                $siRNAmem{$tabs[0]}{$tabs[3]}{$tabs[6]} = 1;
            }
        }
    }
    close MODANNO;

    $filters{a_newborn}{Ji}{LTR}      = 1330;
    $filters{b_midaged}{Ji}{LTR}      = 1220;
    $filters{c_veryold}{Ji}{LTR}      = 1290;
    $filters{a_newborn}{Opie}{LTR}    = 1260;
    $filters{b_midaged}{Opie}{LTR}    = 1245;
    $filters{c_veryold}{Opie}{LTR}    = 1255;
    $filters{a_newborn}{Ji}{siRNA}    = 480;
    $filters{b_midaged}{Ji}{siRNA}    = 395;
    $filters{c_veryold}{Ji}{siRNA}    = 670;
    $filters{a_newborn}{Opie}{siRNA}  = 350;
    $filters{b_midaged}{Opie}{siRNA}  = 180;
    $filters{c_veryold}{Opie}{siRNA}  = 460;
    $plusminus{a_newborn} = 10;
    $plusminus{b_midaged} = 20;
    $plusminus{c_veryold} = 30;
    open (MATRIX, "MASiVEdb.Zea_mays.matrix.NEW1") || print "\tcannot read MASiVEdb.Zea_mays.matrix.NEW1\n";
    while (<MATRIX>) {
        unless (/^#/) {
            chomp;
            @tabs = split /\t/;
            if ($clean{$tabs[5]}) {
                if ($tabs[7] =~ /(D|P)/ && ($tabs[51] eq 'Ji' || $tabs[51] eq 'Opie')) {
                    $fams{$tabs[5]} = $tabs[51];
                    $fam_counts{$tabs[51]}{$tabs[5]} = 1;
                    if ($tabs[39] eq '0' || $tabs[39] eq '0.25' || $tabs[39] eq '1.5' || $tabs[39] eq '1.75' || $tabs[39] eq '>3') {
                        if ($tabs[39] =~ /^0/) {
                            $generation = 'a_newborn';
                        } elsif ($tabs[39] eq '>3') {
                            $generation = 'c_veryold';
                        } else {
                            $generation = 'b_midaged';
                        }
                        $intlen  = $tabs[14] - $tabs[15] - $tabs[16];
                        $LTRdiff = abs($tabs[15] - $tabs[16]);
                        $tabs[11] =~ s/^(Opie|Ji) RT--Zmaychr//;
                        $tabs[11] =~ s/--(Opie|Ji)$//;
                        $zombies{$tabs[51]}{$generation}{SV}{$tabs[5]} = sprintf "%-24s %-10s %-10s %-10s %-10s %-10s %-10s | %-10s %-11s %-10s %-10s | %s | %s\n", $tabs[5],$generation,"FL: $tabs[14]","lLTR: $tabs[15]","rLTR: $tabs[16]","LTR+-: $LTRdiff","INT: $intlen","FL24: ".$siRNAcounts{$tabs[5]}{FL}{24},"LTR24: ".$siRNAcounts{$tabs[5]}{LTR}{24},"5'24: ".keys(%{$siRNAcounts_det{$tabs[5]}{'5prime'}}),"3'24: ".keys(%{$siRNAcounts_det{$tabs[5]}{'3prime'}}),$age{$tabs[5]},$tabs[11];
                        foreach $region (sort {$b cmp $a} keys %{$siRNAcounts_det{$tabs[5]}}) {
                            foreach $siRNA (keys %{$siRNAcounts_det{$tabs[5]}{$region}}) {
                                $zombies{$tabs[51]}{$generation}{siRNA}{$siRNA}{$tabs[5]} = 1;
                            }
                        }
                    }
                }
            } else {
                $lepra{$tabs[5]} = "rest";
            }
        }
    }
    close MATRIX;
    unless (%zombies) { die; }
    open (ZOMBIES, ">zombies.$$") || print "\tcannot create\n";
    open (STEPWISE, ">zombies.$$.stepwise") || print "\tcannot create\n";
    foreach $fam (nsort keys %zombies) {
        if (keys(%{$zombies{$fam}}) >= 3) {
            $generations = '';
            $SVs = '';
            foreach $generation (sort {$a cmp $b} keys %{$zombies{$fam}}) {
                $generations .= "$generation\tSVs\t".keys(%{$zombies{$fam}{$generation}{SV}})."\tsiRNAs\t".keys(%{$zombies{$fam}{$generation}{siRNA}})."\n";

                $count = 0;
                foreach $SV (keys %{$zombies{$fam}{$generation}{SV}}) {
                    ++$count;
                    $count2SV{$fam}{$generation}{$count} = $SV;
                    $SVs .= $zombies{$fam}{$generation}{SV}{$SV};
                    
                    foreach $region (sort {$b cmp $a} keys %{$siRNAcounts_det{$SV}}) {
                        foreach $siRNA (keys %{$siRNAcounts_det{$SV}{$region}}) {
                            if (!defined $other_region || (defined $other_region && !defined $accumulation{$fam}{$generation}{stepwise}{$count}{siRNA}{$other_region}{$siRNA})) {
                                $accumulation{$fam}{$generation}{stepwise}{$count}{siRNA}{$region}{$siRNA} = 1;
                            }
                            foreach $bin (keys %{$siRNAcounts_det{$SV}{$region}{$siRNA}}) {
                                $crosstalk{$fam}{$generation}{stepwise}{$count}{bin}{$bin} = 1;
                            }
                            foreach $all_other_SV (keys %{$siRNAmem{$siRNA}}) {
                                $crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$fams{$all_other_SV}}{SV}{$all_other_SV} = 1;
                                $crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$fams{$all_other_SV}}{siRNA}{$siRNA} = 1;
                                if (defined $siRNAmem{$siRNA}{$all_other_SV}) {
                                    foreach $bin (keys %{$siRNAmem{$siRNA}{$all_other_SV}}) {
                                        $crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$fams{$all_other_SV}}{bin}{$bin} = 1;
                                    }
                                }
                            }
                            foreach $other_generation (sort {$a cmp $b} keys %{$zombies{$fam}}) {
                                if ($generation ne $other_generation) {
                                    if (defined $zombies{$fam}{$other_generation}{siRNA}{$siRNA}) {
                                        foreach $other_SV (keys %{$zombies{$fam}{$other_generation}{siRNA}{$siRNA}}) {
                                            $crosstalk{$fam}{$generation}{stepwise}{$count}{other_gen}{$other_generation}{SV}{$other_SV} = 1;
                                            $crosstalk{$fam}{$generation}{stepwise}{$count}{other_gen}{$other_generation}{siRNA}{$siRNA} = 1;
                                        }
                                    }
                                }
                            }
                        }
                        $other_region = $region;
                    }
                    undef $other_region;
                    
                }
                $SVs .= "\n";
            }
            print ZOMBIES "$fam\n$generations\n$SVs\n";

            foreach $generation (sort {$a cmp $b} keys %{$crosstalk{$fam}}) {
                foreach $count (sort {$a <=> $b} keys %{$crosstalk{$fam}{$generation}{stepwise}}) {
                    foreach $siRNA (keys %{$accumulation{$fam}{$generation}{stepwise}{$count}{siRNA}{'5prime'}}) { $accumulation{$fam}{$generation}{complete}{'5prime'}{$siRNA} = 1; $accumulation{$fam}{$generation}{complete}{complete}{$siRNA} = 1; }
                    foreach $siRNA (keys %{$accumulation{$fam}{$generation}{stepwise}{$count}{siRNA}{'3prime'}}) { $accumulation{$fam}{$generation}{complete}{'3prime'}{$siRNA} = 1; $accumulation{$fam}{$generation}{complete}{complete}{$siRNA} = 1; }
                    print STEPWISE "$fam\t$generation\t$count\t$count2SV{$fam}{$generation}{$count}\t".$siRNAcounts{$count2SV{$fam}{$generation}{$count}}{LTR}{24}."\t".keys(%{$crosstalk{$fam}{$generation}{stepwise}{$count}{bin}})."\t".keys(%{$accumulation{$fam}{$generation}{stepwise}{$count}{siRNA}{'5prime'}})."\t".keys(%{$accumulation{$fam}{$generation}{complete}{'5prime'}})."\t".keys(%{$siRNAcounts_det{$count2SV{$fam}{$generation}{$count}}{'3prime'}})."\t".keys(%{$accumulation{$fam}{$generation}{stepwise}{$count}{siRNA}{'3prime'}})."\t".keys(%{$accumulation{$fam}{$generation}{complete}{'3prime'}})."\t".keys(%{$accumulation{$fam}{$generation}{complete}{complete}})."\t".keys(%{$zombies{$fam}{$generation}{siRNA}});
                    foreach $other_generation (sort {$a cmp $b} keys %{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_gen}}) {
                        if ($generation ne $other_generation) {
                            foreach $other_SV (keys %{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_gen}{$other_generation}{SV}}) { $crosstalk{$fam}{$generation}{complete}{other_gen}{$other_generation}{SV}{$other_SV} = 1; }
                            print STEPWISE "\t$other_generation\t".keys(%{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_gen}{$other_generation}{siRNA}})."\t".keys(%{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_gen}{$other_generation}{SV}})."\t".keys(%{$crosstalk{$fam}{$generation}{complete}{other_gen}{$other_generation}{SV}})."\t".keys(%{$zombies{$fam}{$other_generation}{SV}});
                        }
                    }
                    foreach $other_fam (sort {$a cmp $b} keys %{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}}) {
                        foreach $other_fam_SV  (keys %{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$other_fam}{SV}})  { $crosstalk{$fam}{$generation}{complete}{other_fam}{$other_fam}{SV}{$other_fam_SV}   = 1; }
                        foreach $other_fam_bin (keys %{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$other_fam}{bin}}) { $crosstalk{$fam}{$generation}{complete}{other_fam}{$other_fam}{bin}{$other_fam_bin} = 1; }
                        print STEPWISE "\t$other_fam\t".keys(%{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$other_fam}{siRNA}})."\t".keys(%{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$other_fam}{bin}})."\t".keys(%{$crosstalk{$fam}{$generation}{complete}{other_fam}{$other_fam}{bin}})."\t".keys(%{$crosstalk{$fam}{$generation}{stepwise}{$count}{other_fam}{$other_fam}{SV}})."\t".keys(%{$crosstalk{$fam}{$generation}{complete}{other_fam}{$other_fam}{SV}})."\t".keys(%{$fam_counts{$other_fam}});
                    }
                    print STEPWISE "\n";
                }
            }
        }
    }
    close ZOMBIES;
    close STEPWISE;
    
}
